From e33526834283ea238c08890c9067ce7f29ecc7bc Mon Sep 17 00:00:00 2001 From: Spencer Hill Date: Tue, 28 Mar 2017 23:17:26 -0700 Subject: [PATCH 1/9] Add aospy.examples.example_obj_lib --- aospy/__init__.py | 1 + aospy/examples/__init__.py | 1 + 2 files changed, 2 insertions(+) create mode 100644 aospy/examples/__init__.py diff --git a/aospy/__init__.py b/aospy/__init__.py index 2467d7d..b459718 100644 --- a/aospy/__init__.py +++ b/aospy/__init__.py @@ -20,6 +20,7 @@ from . import calc from .calc import CalcInterface, Calc from .automate import submit_mult_calcs +from . import examples __all__ = ['Proj', 'Model', 'Run', 'Var', 'Units', 'Constant', 'Region', 'units', 'calc', 'constants', 'utils'] diff --git a/aospy/examples/__init__.py b/aospy/examples/__init__.py new file mode 100644 index 0000000..2c5e80c --- /dev/null +++ b/aospy/examples/__init__.py @@ -0,0 +1 @@ +from . import example_obj_lib From 050688513ab4510cb278c18c882a22614b6e4240 Mon Sep 17 00:00:00 2001 From: Spencer Hill Date: Tue, 28 Mar 2017 23:18:16 -0700 Subject: [PATCH 2/9] Update submit_mult_calcs docstring, Calc repr, & example lib formatting --- aospy/automate.py | 114 +++++++++++++++++++++++++++--- aospy/calc.py | 4 +- aospy/examples/example_obj_lib.py | 15 ++-- 3 files changed, 114 insertions(+), 19 deletions(-) diff --git a/aospy/automate.py b/aospy/automate.py index a4be820..49fc819 100644 --- a/aospy/automate.py +++ b/aospy/automate.py @@ -290,34 +290,126 @@ def _print_suite_summary(calc_suite_specs): def submit_mult_calcs(calc_suite_specs, exec_options=None): """Generate and execute all specified computations. + Once the calculations are prepped and submitted for execution, any + calculation that triggers any exception or error is skipped, and the rest + of the calculations proceed unaffected. This prevents an error in a single + calculation from crashing a large suite of calculations. + Parameters ---------- calc_suite_specs : dict The specifications describing the full set of calculations to be - generated and potentially executed. + generated and potentially executed. Accepted keys and their values: + + library : module or package comprising an aospy object library + The aospy object library for these calculations. + projects : list of aospy.Proj objects + The projects to permute over. + models : 'all', 'default', or list of aospy.Model objects + The models to permute over. If 'all', use all models in the + ``models`` attribute of each ``Proj``. If 'default', use all + models in the ``default_models`` attribute of each ``Proj``. + runs : 'all', 'default', or list of aospy.Run objects + The runs to permute over. If 'all', use all runs in the + ``runs`` attribute of each ``Model``. If 'default', use all + runs in the ``default_runs`` attribute of each ``Model``. + variables : list of aospy.Var objects + The variables to be calculated. + regions : 'all' or list of aospy.Region objects + The region(s) over which any regional reductions will be performed. + If 'all', use all regions in the ``regions`` attribute of each + ``Proj``. + date_ranges : 'default' or tuple of datetime.datetime objects + The range of dates (inclusive) over which to perform calculations. + If 'default', use the ``default_start_date`` and + ``default_end_date`` attribute of each ``Run``. + output_time_intervals : {'ann', season-string, month-integer} + The sub-annual time interval over which to aggregate. + + - 'ann' : Annual mean + - season-string : E.g. 'JJA' for June-July-August + - month-integer : 1 for January, 2 for February, etc. Each one is + a separate reduction, e.g. [1, 2] would produce averages (or + other specified time reduction) over all Januaries, and + separately over all Februaries. + + output_time_regional_reductions : list of reduction string identifiers + Unlike most other keys, these are not permuted over when creating + the :py:class:`aospy.Calc` objects that execute the calculations; + each :py:class:`aospy.Calc` performs all of the specified + reductions. Accepted string identifiers are: + + - Gridpoint-by-gridpoint output: + + - 'av' : Gridpoint-by-gridpoint time-average + - 'std' : Gridpoint-by-gridpoint temporal standard deviation + - 'ts' : Gridpoint-by-gridpoint time-series + + - Averages over each region specified via `region`: + + - 'reg.av', 'reg.std', 'reg.ts' : analogous to 'av', 'std', 'ts' + + output_vertical_reductions : {None, 'vert_av', 'vert_int'}, optional + How to reduce the data vertically: + + - None : no vertical reduction + - 'vert_av' : mass-weighted vertical average + - 'vert_int' : mass-weighted vertical integral + input_time_intervals : {'annual', 'monthly', 'daily', '6hr', '3hr'} + A string specifying the time resolution of the input data. + input_time_datatypes : {'inst', 'ts', 'av'} + What the time axis of the input data represents: + + - 'inst' : Timeseries of instantaneous values + - 'ts' : Timeseries of averages over the period of each time-index + - 'av' : A single value averaged over a date range + + input_vertical_datatypes : {False, 'pressure', 'sigma'}, optional + The vertical coordinate system used by the input data: + + - False : not defined vertically + - 'pressure' : pressure coordinates + - 'sigma' : hybrid sigma-pressure coordinates + + input_time_offsets : {None, dict}, optional + How to offset input data in time to correct for metadata errors + + - None : no time offset applied + - dict : e.g. ``{'hours': -3}`` to offset times by -3 hours + See :py:meth:`aospy.utils.times.apply_time_offset`. + exec_options : dict or None (default None) Options regarding how the calculations are reported, submitted, and saved. If None, default settings are used for all options. Currently supported options (each should be either `True` or `False`): - - prompt_verify : If True, print summary of calculations to be - performed and prompt user to confirm before submitting for - execution - - parallelize : If True, submit calculations in parallel - - write_to_tar : If True, write results of calculations to .tar files, - one for each object. These tar files have an identical directory - structures the standard output relative to their root directory, - which is specified via the `tar_direc_out` argument of each Proj - object's instantiation. + - prompt_verify : (default False) If True, print summary of + calculations to be performed and prompt user to confirm before + submitting for execution. + - parallelize : (default False) If True, submit calculations in + parallel. + - write_to_tar : (default True) If True, write results of calculations + to .tar files, one for each object. These tar files have an + identical directory structures the standard output relative to + their root directory, which is specified via the `tar_direc_out` + argument of each Proj object's instantiation. Returns ------- - A list of the values returned by each Calc object that was executed. + A list of the return values for each :py:meth:`aospy.Calc.compute` call + that was made. If a calculation ran without error, this value is the + :py:class`aospy.Calc` object itself, with the results of its calculations + saved in its ``data_out`` attribute. ``data_out`` is a dictionary, with + the keys being the temporal-regional reduction identifiers (e.g. 'reg.av'), + and the values being the corresponding result. + + If any error occurred during a calculation, the return value is None. Raises ------ AospyException : if the ``prompt_verify`` option is set to True and the user does not respond affirmatively to the prompt. + """ if exec_options is None: exec_options = dict() diff --git a/aospy/calc.py b/aospy/calc.py index a1c4262..4ab229d 100644 --- a/aospy/calc.py +++ b/aospy/calc.py @@ -228,9 +228,9 @@ class Calc(object): def __str__(self): """String representation of the object.""" - return "Calc object: " + ', '.join( + return "" __repr__ = __str__ diff --git a/aospy/examples/example_obj_lib.py b/aospy/examples/example_obj_lib.py index 9cdcd80..340a868 100644 --- a/aospy/examples/example_obj_lib.py +++ b/aospy/examples/example_obj_lib.py @@ -1,12 +1,17 @@ """Sample aospy object library using the included example data.""" import datetime +import os +import aospy from aospy import Model, Proj, Region, Run, Var from aospy.data_loader import DictDataLoader -_file_map = {'monthly': - '../test/data/netcdf/000[4-6]0101.precip_monthly.nc'} +rootdir = os.path.join(aospy.__path__[0], 'test', 'data', 'netcdf') + + +_file_map = {'monthly': os.path.join(rootdir, + '000[4-6]0101.precip_monthly.nc')} example_run = Run( name='example_run', description=( @@ -20,10 +25,8 @@ example_model = Model( name='example_model', - grid_file_paths=( - '../test/data/netcdf/00040101.precip_monthly.nc', - '../test/data/netcdf/im.landmask.nc' - ), + grid_file_paths=(os.path.join(rootdir, '00040101.precip_monthly.nc'), + os.path.join(rootdir, 'im.landmask.nc')), runs=[example_run] ) From cbaf9f9f8729b225eb1e4abede06febcce84be4f Mon Sep 17 00:00:00 2001 From: Spencer Hill Date: Tue, 28 Mar 2017 23:20:34 -0700 Subject: [PATCH 3/9] DOC use example_obj_lib in Examples; other less major doc edits --- docs/api.rst | 7 +- docs/examples.rst | 523 +++++++++++++++++++++++++++++++------------ docs/index.rst | 46 +++- docs/overview.rst | 109 ++++----- docs/using-aospy.rst | 100 ++++----- 5 files changed, 516 insertions(+), 269 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index ccc3d41..39bd46f 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,11 +1,14 @@ +.. _api-ref: + ############# API Reference ############# Here we provide the reference documentation for aospy's public API. If you are new to the package and/or just trying to get a feel for the -overall workflow, you are better off starting in the main -documentation sections. +overall workflow, you are better off starting in the :ref:`Overview +`, :ref:`using-aospy`, or :ref:`examples` sections of this +documentation. .. warning:: diff --git a/docs/examples.rst b/docs/examples.rst index 46a4025..165d9be 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -1,53 +1,123 @@ +.. _examples: + ######## Examples ######## -aospy comes with some `sample data files -`_, -which can be used to illustrate some of its basic features. These -files contain monthly mean time series output of two variables (the -large-scale and convective components of the total precipitation rate) -from an idealized aquaplanet climate model. A simple computation one -could seek to do from this model output would be to compute some -statistics of the total precipitation rate (large-scale plus -convective). +.. note:: + + The footnotes in this section provide scientific background to help + you understand the motivation and physical meaning of these example + calculations. They can be skipped if you are familiar already or + don't care. + +In this section, we use the `example data files +`_ +included with aospy to demonstrate the standard aospy workflow of +executing and submitting multiple calculations at once. + +These files contain timeseries of monthly averages of two variables +generated by an idealized aquaplanet climate model: [#idealized]_ +[#aquaplanet]_ + +1. Precipitation generated through gridbox-scale condensation +2. Precipitation generated through the model's convective + parameterization [#var-defs]_ + +Using this data that was directly outputted by our model, let's +compute two other useful quantities: (1) the total precipitation rate, +and (2) the fraction of the total precipitation rate that comes from +the convective parameterization. We'll compute the time-average over +the whole duration of the data, both at each gridpoint and aggregated +over specific regions. + +Preliminaries +------------- + +First we'll save the path to the example data in a local variable, +since we'll be using it in several places below. + +.. ipython:: python + + import os # Python built-in package for working with the operating system + import aospy + rootdir = os.path.join(aospy.__path__[0], 'test', 'data', 'netcdf') -Here's a quick summary of the included data: + +Now we'll use the fantastic `xarray +`_ package to inspect the data: .. ipython:: python - + import xarray as xr - xr.open_mfdataset('../aospy/test/data/netcdf/000[4-6]0101.precip_monthly.nc', + xr.open_mfdataset(os.path.join(rootdir, '000[4-6]0101.precip_monthly.nc'), decode_times=False) -In this particular model, the large-scale component of the precipitation rate -is called "condensation_rain" and the convective component is called -"convection_rain." +We see that, in this particular climate model, the variable names for +these two forms of precipitation are "condensation_rain" and +"convection_rain", respectively. The file also includes the +coordinate arrays ('lat', 'time', etc.) that indicate where in space +and time the data refers to. + +Now that we know where and what the data is, we'll proceed through the +workflow described in the :ref:`Using aospy ` section of +this documentation. + +Describing your data +-------------------- + +Runs and DataLoaders +==================== + +First we create an :py:class:`aospy.Run` object that stores metadata +about this simulation. This includes specifying where its files are +located via a :py:class:`aospy.data_loader.DataLoader` object. -Defining the simulation metadata -================================ +DataLoaders specify where your data is located and organized. Several +types of DataLoaders exist, each for a different directory and file +structure; see the :ref:`api-ref` for details. -The first step is to create an :py:class:`aospy.Run` object that -stores metadata about this simulation. This includes giving it a -name, a description, and specifying where files are located through a -DataLoader. +For our simple case, where the data comprises a single file, the +simplest DataLoader, a ``DictDataLoader``, works well. It maps your +data based on the time frequency of its output (e.g. 6 hourly, daily, +monthly, annual) to the corresponding netCDF files via a simple +dictionary: .. ipython:: python - from datetime import datetime - - from aospy import Run from aospy.data_loader import DictDataLoader - file_map = {'monthly': '../aospy/test/data/netcdf/000[4-6]0101.precip_monthly.nc'} + file_map = {'monthly': rootdir + '/aospy/test/data/netcdf/000[4-6]0101.precip_monthly.nc'} + data_loader = DictDataLoader(file_map) + +We then pass this to the `Run` constructor, along with a name for the +run and an optional description (see the :ref:`API reference +` for other optional arguments): + +.. ipython:: python + + from aospy import Run example_run = Run( name='example_run', - description=( - 'Control simulation of the idealized moist model' - ), - data_loader=DictDataLoader(file_map) + description='Control simulation of the idealized moist model', + data_loader=data_loader ) - -We then need to associate this ``Run`` with an :py:class:`aospy.Model` object: + +Models +====== + +Next, We create the :py:class:`aospy.Model` object that describes the +model in which the simulation was executed. One important attribute +is ``grid_file_paths``, which consists of a sequence (e.g. a tuple or +list) of netCDF files from which physical attributes of that model +can be found that aren't already embedded in the output netCDF files. + +For example, often the land mask that defines which gridpoints are +ocean or land is outputted to a single, standalone netCDF file, rather +than being included in the other output files. But we often need the +land mask, e.g. to define certain land-only or ocean-only regions. +This and other grid-related properties shared across all of a Model's +simulations can be found in one or more of the files in +``grid_file_paths``. .. ipython:: python @@ -55,122 +125,140 @@ We then need to associate this ``Run`` with an :py:class:`aospy.Model` object: example_model = Model( name='example_model', grid_file_paths=( - '../aospy/test/data/netcdf/00040101.precip_monthly.nc', - '../aospy/test/data/netcdf/im.landmask.nc' + rootdir + '/aospy/test/data/netcdf/00040101.precip_monthly.nc', + rootdir + '/aospy/test/data/netcdf/im.landmask.nc' ), - runs=[example_run] + runs=[example_run] # only one Run in our case, but could be more ) -Finally, we need to associate the ``Model`` object with an -:py:class:`aospy.Proj` object. Here we can specify the location that -aospy will save its output files. +Projects +======== + +Finally, we associate the ``Model`` object with an +:py:class:`aospy.Proj` object. This is the level at which we specify +the directories to which aospy output gets written. .. ipython:: python from aospy import Proj example_proj = Proj( 'example_proj', - direc_out='example-output', - tar_direc_out='example-tar-output', - models=(example_model,) + direc_out='example-output', # default, netCDF output (always on) + tar_direc_out='example-tar-output', # output to .tar files (optional) + models=(example_model,) # only one Model in our case, but could be more ) -Now the metadata associated with this simulation is fully defined. We -can move on to computing the total precipitation. -Computing the annual mean total precipitation rate -================================================== +Defining physical quantities and regions +---------------------------------------- -We can start by defining a simple -python function that computes the total precipitation from condensation and -convection rain arguments: +Having now fully specified the particular data of interest, we now +define more abstractly the physical quantities of interest and any +geographic regions over which to aggregate results. + +Physical variables +================== + +We'll first define :py:class:`aospy.Var` objects for the two variables +that we saw are directly available as model output: .. ipython:: python - def total_precipitation(condensation_rain, convection_rain): - return condensation_rain + convection_rain + from aospy import Var + + precip_largescale = Var( + name='precip_largescale', # name used by aospy + alt_names=('condensation_rain',), # its possible name(s) in your data + def_time=True, # whether or not it is defined in time + description='Precipitation generated via grid-scale condensation', + ) + precip_convective = Var( + name='precip_convective', + alt_names=('convection_rain', 'prec_conv'), + def_time=True, + description='Precipitation generated by convective parameterization', + ) + +When it comes time to load data corresponding to either of these from +one or more particular netCDF files, aospy will search for variables +matching either ``name`` or any of the names in ``alt_names``, +stopping at the first successful one. This makes the common problem +of model-specific variable names a breeze! -To hook this function into the aospy framework, we need to connect it -to an :py:class:`aospy.Var` object, as well as define the ``Var`` -objects it depends on (variables that are natively stored in model -output files). +.. warning:: + + This assumes that the name and all alternate names are unique to + that variable, i.e. that in none of your data do those names + actually signify something else. If that was indeed the case, + aospy can potentially grab the wrong data without issuing an error + message or warning. + +Next, we'll create functions that compute the total precipitation and +convective precipitation fraction and combine them with the above +:py:class:`aospy.Var` objects to define the new :py:class:`aospy.Var` +objects: .. ipython:: python - from aospy import Var - condensation_rain = Var( - name='condensation_rain', - alt_names=('prec_ls',), - def_time=True, - description=('condensation rain'), - ) + def total_precip(condensation_rain, convection_rain): + """Sum of large-scale and convective precipitation.""" + return condensation_rain + convection_rain - convection_rain = Var( - name='convection_rain', - alt_names=('prec_conv',), - def_time=True, - description=('convection rain'), - ) + def conv_precip_frac(precip_largescale, precip_convective): + """Fraction of total precip that is from convection parameterization.""" + total = total_precip(precip_largescale, precip_convective) + return precip_convective / total.where(total) + + + precip_total = Var( + name='precip_total', + def_time=True, + func=total_precip, + variables=(precip_largescale, precip_convective), + ) - precip = Var( - name='total_precipitation', + precip_conv_frac = Var( + name='precip_conv_frac', def_time=True, - description=('total precipitation rate'), - func=total_precipitation, - variables=(condensation_rain, convection_rain) - ) - -Here the func attribute of the precip ``Var`` object is the function -we defined, and the variables attribute is a tuple containing the -``Var`` objects the function depends on, in the order of the -function's call signature. - -If we'd like to compute the time-mean total precipitation rate from -year four to year six using aospy, we can create an -:py:class:`aospy.Calc` object. This is currently done through passing -an :py:class:`aospy.CalcInterface` object to a ``Calc`` object; once -created, the computation can be submitted by simply calling the -compute function of ``Calc``. - -.. ipython:: python - - from aospy import CalcInterface, Calc - calc_int = CalcInterface( - proj=example_proj, - model=example_model, - run=example_run, - var=precip, - date_range=(datetime(4, 1, 1), datetime(6, 12, 31)), - intvl_in='monthly', - dtype_in_time='ts', - intvl_out='ann', - dtype_out_time='av' + func=conv_precip_frac, + variables=(precip_largescale, precip_convective), ) - Calc(calc_int).compute() -The result is stored in a netcdf file, whose path and filename -contains metadata about where it came from: +Notice the ``func`` and ``variables`` attributes that weren't in the +prior ``Var`` constuctors. These signify the function to use and the +physical quantities to pass to that function in order to compute the +quantity. -.. ipython:: python - - calc_int.path_out['av'] +.. note:: -Using xarray we can open and plot the results of the calculation: + Although ``variables`` is passed a tuple of ``Var`` objects + corresponding to the physical quantities passed to ``func``, + ``func`` should be a function whose arguments are the + :py:class:`xarray.DataArray`s corresponding to those variables. + aospy uses the ``Var`` objects to load the DataArrays and then + passes them to the function. -.. ipython:: python + This enables you to write simple, expressive functions comprising + only the physical operations to perform (since the "data wrangling" + part has been handled already). - @savefig plot_ann_total_precipitation.png width=80% - xr.open_dataset(calc_int.path_out['av']).total_precipitation.plot() +.. warning:: -Computing the global annual mean total precipitation rate -========================================================= + Order matters in the tuple of :py:class:`aospy.Var` objects passed + to the ``variables`` attribute: it must match the order of the call + signature of the function passed to ``func``. -Not only does aospy enable reductions along the time dimension, it -also enables area weighted regional averages. As a simple -introduction, we'll show how to compute the global mean total -precipitation rate from this ``Run``. To do so, we'll make use of the -infrastructure defined above, and also define an -:py:class:`aospy.Region` object: + E.g. in ``precip_conv_frac`` above, if we had mistakenly done + ``variables=(precip_convective, precip_largescale)``, the + calculation would execute without error, but all of the results + would be physically wrong. + +Geographic regions +================== + +Last, we define the geographic regions over which to perform +aggregations and add them to ``example_proj``. We'll look at the +whole globe and at the Tropics: .. ipython:: python @@ -183,42 +271,203 @@ infrastructure defined above, and also define an do_land_mask=False ) -To compute the global annual mean total precipitation rate, we can now create -another ``Calc`` object: + tropics = Region( + name='tropics', + description='Global tropics, defined as 30S-30N', + lat_bounds=(-30, 30), + lon_bounds=(0, 360), + do_land_mask=False + ) + example_proj.regions = [globe, tropics] + +We now have all of the needed metadata in place. So let's start +crunching numbers! + +Submitting calculations +----------------------- + +Using :py:func:`aospy.submit_mult_calcs` +======================================== + +Having put in the legwork above of describing our data and the +physical quantities we wish to compute, we can submit our desired +calculations for execution using :py:func:`aospy.submit_mult_calcs`. +Its sole required argument is a dictionary specifying all of the +desired parameter combinations. + +In the example below, we import and use the ``example_obj_lib`` module +that is included with aospy and whose objects are essentially +identical to the ones we've defined above. + +.. ipython:: python + + from aospy.examples import example_obj_lib as lib + + calc_suite_specs = dict( + library=lib, + projects=[lib.example_proj], + models=[lib.example_model], + runs=[lib.example_run], + variables=[lib.precip_largescale, lib.precip_convective, + lib.precip_total, lib.precip_conv_frac], + regions='all', + date_ranges='default', + output_time_intervals=['ann'], + output_time_regional_reductions=['av', 'reg.av'], + output_vertical_reductions=[None], + input_time_intervals=['monthly'], + input_time_datatypes=['ts'], + input_time_offsets=[None], + input_vertical_datatypes=[False], + ) + +:py:func:`submit_mult_calcs` also accepts a second dictionary +specifying some options regarding how we want aospy to display, +execute, and save our calculations. For the sake of this simple +demonstration, we'll suppress the prompt to confirm the calculations, +submit them in serial rather than parallel, and suppress writing +backup output to .tar files: + +.. ipython:: python + + calc_exec_options = dict(prompt_verify=False, parallelize=False, + write_to_tar=False) + +Now let's submit this for execution: + +.. ipython:: python + + from aospy import submit_mult_calcs + calcs = submit_mult_calcs(calc_suite_specs, calc_exec_options) + +This permutes over all of the parameter settings in +``calc_suite_specs``, generating and executing the resulting +calculation. In this case, it will compute all four variables and +perform annual averages, both for each gridpoint and regionally +averaged. + +Results +======= + +The result is a list of :py:class:`aospy.Calc` objects, one per +simulation. .. ipython:: python - calc_int = CalcInterface( - proj=example_proj, - model=example_model, - run=example_run, - var=precip, - date_range=(datetime(4, 1, 1), datetime(6, 12, 31)), - intvl_in='monthly', - dtype_in_time='ts', - intvl_out='ann', - dtype_out_time='reg.av', - region={'globe': globe} - ) - Calc(calc_int).compute() + calcs + +Each :py:class:`aospy.Calc` object includes the paths to the output +and the results of each output type. + +.. ipython:: python + + calcs[0].path_out + calcs[0].data_out + +**(S. Hill: Still need to finish up everything below this point)** + +.. note:: + + You may have noticed that ``subset_`` and ``raw_`` coordinates have + years 1678 and later, when our data was from model years 4 + through 6. This is because technical details upstream (in numpy) + limit the range of supported years to roughly 1677 to 2234. -This produces a new file, located in: + As a workaround, aospy pretends that any timeseries that starts + before the beginning of this range actually starts at the start of + this range. A solution to this problem at the xarray level is + currently under way, at which point all meaningful dates will be + supported without any workarounds or date range limits. + +Let's plot the time average at each gridcell of all four variables: .. ipython:: python - calc_int.path_out['reg.av'] + from matplotlib import pyplot as plt + + fig = plt.figure() + + for i in range(4): + ax = fig.add_subplot(2, 2, i+1) + xr.open_dataset(calcs[i].path_out['av']).to_array().plot(ax=ax) -We find that the global annual mean total precipitation rate for this -run (converting to units of mm per day) is: + @savefig plot_av.png width=100% + plt.show() + +We see that the convective precipitation dominates the total in the +Tropics, but moving poleward the gridscale condensation plays an +increasingly larger fractional role. [#ls-conv]_ + +Now let's examine the regional averages. We find that the global +annual mean total precipitation rate for this run (converting to units +of mm per day) is: .. ipython:: python - xr.open_dataset(calc_int.path_out['reg.av']).globe * 86400. + calcs[0].data_out['reg.av'].to_array()*86400 + +As was evident from the plots, we see that the global mean +precipitation is dominated by the convective rainfall, and the +tropical mean even more so. + +Beyond this simple example +-------------------------- + +We can re-use our object library at will to perform new calculations +or re-compute old ones. We can also add new objects. For example, +suppose we performed a new simulation in which we increased the +surface albedo or introduced a rectangular continent. All we would +have to do is create a corresponding ``Run`` object, and then we can +execute calculations for that simulation! If we had other +simulations, we would create more :py:class:`aospy.Run` instances, one +per simulation. And likewise for models, projects, variables, and +regions. + .. ipython:: python :suppress: - + from shutil import rmtree rmtree('example-output') rmtree('example-tar-output') + +.. rubric:: Footnotes + +.. [#idealized] + + An "idealized climate model" is a model that, for the sake of + computational efficiency and conceptual simplicity, omits and/or + simplifies various processes relative to how they are computed in + full, production-class models. The particular model used here is + described in `Frierson et al 2006 + `_. + +.. [#aquaplanet] + + An "aquaplanet" is simply a climate model in which the the surface + is entirely ocean, i.e. there is no land. Interactions between + atmospheric and land processes are complicated, and so an + aquaplanet avoids those complications while still generating a + climate (when zonally averaged, i.e. averaged around each latitude + circle) that roughly resembles that of the real Earth's. + +.. [#var-defs] + + Most climate models generate precipitation through two separate + pathways: (1) direct saturation of a whole gridbox, which results + in condensation and precipitation, and (2) a "convective + parameterization." The latter simulates the precipitation that, + due to subgrid-scale variability, can be expected to occur at some + fraction of the area within a gridcell, even though the cell as a + whole isn't saturated. The total precipitation is simply the sum + of these "large-scale" and "convective" components. + +.. [#ls-conv] + + This is a very common result. The gridcells of many climate models + are several hundred kilometers by several hundred kilometers in + area. In Earth's Tropics, most rainfall is generated by cumulus + towers that are much smaller than this. But in the mid-latitudes, + a phenomenon known as baroclinic instability generates much larger + eddies that can span several hundred kilometers. diff --git a/docs/index.rst b/docs/index.rst index c877362..96d9df1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,6 +3,8 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. +.. _main-page: + .. image:: aospy_logo.png :alt: aospy logo :align: center @@ -14,19 +16,27 @@ aospy: automated climate data analysis and management ##################################################### -**aospy** is an open source Python package for automating computations -that use gridded climate data (namely data stored as netCDF files) -and the management of the results of those computations. +**aospy** is an open source Python package for automating your +computations that use gridded climate and weather data (namely data +stored as netCDF files) and the management of the results of those +computations. + +After you use aospy's built-in tools to describe where to find your +data, use aospy's "main" script whenever you want to fire off +calculations to be performed in parallel using the permutation of an +arbitrary number of climate models, simulations, variables to be +computed, date ranges, sub-annual-sampling, and many other parameters. +In other words, it is possible using aospy to submit and execute *all* +calculations for a particular project (e.g. paper, class project, or +thesis chapter) with a single command! -Once a user describes where their data is stored on disk using aospy's -built-in tools, they can subsequently use the provided main script at -any time to fire off calculations to be performed in parallel using -the permutation of an arbitrary number of climate models, simulations, -variables to be computed, date ranges, sub-annual-sampling, and many -other parameters. Their results get saved in a highly structured -directory format as netCDF files. +The results get saved in a highly organized directory tree as netCDF +files, making it easy to subsequently find and use the data (e.g. for +plotting) and preventing "orphan" files with vague filenames and +insufficient metadata to remember what they are and/or how they were +computed. -The eventual goal is for aospy to become the "industry standard" for +The eventual goal is for aospy to become the community standard for gridded climate data analysis and, in so doing, accelerate progress in climate science and make the results of climate research more easily reproducible and shareable. aospy relies heavily on the `xarray @@ -44,13 +54,25 @@ Documentation install api +See also +======== + +- Spencer Hill's talk on aospy (`slides + `_, + `recorded talk + `_) + at the Seventh Symposium on Advances in Modeling and Analysis Using + Python, recorded 2017 January 24 as part of the 2017 American + Meteorological Society Annual Meeting. + Get in touch ============ - Troubleshooting: We are actively seeking new users and are eager to help you get started with aospy! Usage questions, bug reports, and any other correspondence are all welcome and best placed as `Issues - on the Github repo `_. + `_ on our Github repo + `_. - Contributing: We are also actively seeking new developers! Please get in touch by opening an Issue or submitting a Pull Request. diff --git a/docs/overview.rst b/docs/overview.rst index 4978619..73cb8da 100644 --- a/docs/overview.rst +++ b/docs/overview.rst @@ -1,79 +1,59 @@ +.. _overview: + Overview: Why aospy? ==================== -Motivations ------------ - -Climate models generally output a wide array of useful quantities, but -almost invariably not all needed quantities are directly outputted. -Even for those that are, further slicing and dicing in time and/or -space are required. Moreover, these multiple computations and -spatiotemporal reductions are needed for not just a single simulation -but across multiple models, simulations, time durations, subsets of -the annual cycle, and so on. +Use cases +--------- -Performing these computations across all of the desired parameter -combinations quickly becomes impractical without some automation. But -even once some automation is in place, the resulting plethora of data -quickly becomes unusable unless it is easily found and imbued with -sufficient metadata to describe precisely what it is and how it was -computed. +If you've ever found yourself saying or thinking anything along these +lines, then aospy may be a great tool for you: -What aospy does ---------------- +- "What is this precip01.dat file in my home directory that I vaguely + remember creating a few weeks ago? Better just delete it and re-do + the calculation to be sure." +- "I really need to analyze variable X. But the models/observational + products I'm using don't provide it. They do provide variables Y + and Z, from which I can compute quantity X." +- "I need to calculate quantity X from 4 different simulations + repeated in 5 different models; computing monthly, seasonal, and + annual averages and standard deviations; gridpoint-by-gridpoint and + averaged over these 10 regions. That's impractical -- I'll just do + a small subset." -aospy provides functionality that enables users to perform commonly -needed tasks in climate, weather, and related sciences as: +With aospy, you can easily resolve these issues and thereby accelerate +your research. -* Repeating a calculation across multiple simulations in a single - climate model -* Repeating a calculation across the same simulations performed in - multiple models, even if variable names or other quantities differ - among the models or simulations -* Repeating a calculation across multiple timespans, both in terms of - the start date and end date and in terms of sub-annual sampling: - e.g. annual mean, seasonal-means, monthly means. -* Computing multiple statistical (e.g. mean, standard deviation) and - physical (e.g. column integrals or averages; zonal integrals or - averages) reductions on any given computation, both on a - gridpoint-by-gridpoint basis and over an arbitrary number of - geographical regions -* Any combination of the above, plus many more! +.. _design-philosophy: Design philosophy ----------------- -Key to enabling this automation and handling of model- and -simulation-level idiosyncrasies is separating +aospy's ability to automate calculations (while properly handling +model- and simulation-level idiosyncrasies) relies on separating your +code into three distinct categories. -1. Code that describes the data that you want to work with -2. Code that specifies any physical calculations you eventually want - to perform -3. Code that specifies the set of computations the user wishes to - perform at a given time +1. Code characterizing the data you want to work with: "Where is your + data and what does it represent?" +2. Code describing abstract physical quantities and geographical + regions you eventually want to examine: "What things are you + generally interested in calculating?" +3. Code specifying the exact parameters of calculations you want to + perform right now: "Ok, time to actually crunch some numbers. + Exactly what all do you want to compute from your data, and how do + you want to slice and dice it?" -For (1), the user defines objects at three distinct levels: `Proj`, -`Model`, and `Run`, that specify where the data is located that you -want to work with. For (2), the user defines `Var` objects that -describe the physical quantities to be computed, including any -functions that transform one or more directly model-outputted -quantities into the ultimately desired quantity, as well as `Region` -objects that describe any geographical regions over which to perform -averages. Once these objects have been defined, the user can proceed -with (3) via a simple script that specifies any models, simulations, -physical quantities, etc. to be performed. - -The run script can be modified and re-submitted as further -calculations are desired. Similarly, new objects can be defined at -any time describing new simulations, models, or variables. More -detailed instructions are available in the "Using aospy" section. +How you'll actually interact with aospy in order to achieve each of +these three steps is described in the :ref:`using-aospy` section of +this documentation, and explicit examples using included sample data +are in the :ref:`examples` section. Open Science & Reproducible Research ------------------------------------ -aospy promotes `open science -`_ and `reproducible -research +This separation of your code into three categories promotes `open +science `_ and +`reproducible research `_ in multiple ways: @@ -81,7 +61,7 @@ in multiple ways: used to compute particular physical quantities, the latter can be written in a generic form that closely mimics the physical form of the particular expression. The resulting code is easier to read and - debut and therefore to share with others. + debug and therefore to share with others. - By enabling automation of calculations across an arbitrary number of parameter combinations, aospy facilitates more rigorous analyses and/or analyses encompassing a larger span of input data than would @@ -89,9 +69,6 @@ in multiple ways: - By outputting the results of calculations as netCDF files in a highly organized directory structure with lots of metadata embued within the file path, file name, and the netCDF file itself, aospy - facilitates the sharing of data with others. - - It also enhances the usability of one's own data, providing a remedy - to the familiar refrain among scientists along the lines of "What is - this data1.txt file that was created six months ago? Better just - delete it and re-do the calculations to be sure.") + facilitates the sharing of results with others (including your + future self that has forgotten the myriad details of how you + have computed things right now). diff --git a/docs/using-aospy.rst b/docs/using-aospy.rst index 859786e..7056941 100644 --- a/docs/using-aospy.rst +++ b/docs/using-aospy.rst @@ -1,20 +1,13 @@ +.. _using-aospy: + ########### Using aospy ########### This section provides a high-level summary of how to use aospy. See -the Examples section and associated Jupyter Notebook for concrete -examples. - -.. note:: - - There is a non-trivial amount of effort required (mainly in - creating and populating your object library, described below) - before you will be able to perform any calculations. However, once - the object library is in place, there is essentially no limit to - the number of calculations that can be performed on your data - either together at the same time or at different times. In other - words, the spinup time should be well worth it. +the :ref:`Overview ` section of this documentation for more +background information, or the :ref:`Examples` section and associated +Jupyter Notebook for concrete examples. Your aospy object library ========================= @@ -27,51 +20,52 @@ Describing your data on disk ---------------------------- aospy needs to know where the data you want to use is located on disk -and how it is organized across different simulations, models, and -projects. This involves a hierarchy of three classes, ``Proj``, -``Model``, and ``Run``. +and how it is organized across different projects, models, and model +runs (i.e. simulations). This involves a hierarchy of three classes, +:py:class:`aospy.Proj`, :py:class:`aospy.Model`, and +:py:class:`aospy.Run`. -1. ``Proj``: This represents a single project that involves analysis of - data from one or more models and simulations. +1. :py:class:`aospy.Proj`: This represents a single project that + involves analysis of data from one or more models and simulations. -2. ``Model``: This represents a single climate model, other numerical - model, observational data source, etc. +2. :py:class:`aospy.Model`: This represents a single climate model, + other numerical model, observational data source, etc. -3. ``Run``: This represents a single simulation, version of - observational data, etc. +3. :py:class:`aospy.Run`: This represents a single simulation, + version of observational data, etc. -So each user's object library will contain one or more ``Proj`` -objects, each of which will have one or more child ``Model`` objects, -which in turn will each have one or more child ``Run`` objects. +So each user's object library will contain one or more +:py:class:`aospy.Proj` objects, each of which will have one or more +child :py:class:`aospy.Model` objects, which in turn will each have +one or more child :py:class:`aospy.Run` objects. .. note:: Currently, the Proj-Model-Run hierarchy is rigid, in that each Run - has a parent Model, and each Model has a parent Proj. For small - projects, this can lead to a lot of boilerplate code. Work is - ongoing to relax this constraint to facilitate easier exploratory - analysis. + has a parent Model, and each Model has a parent Proj. Work is + ongoing to relax this to a more generic parent-child framework. Physical variables ------------------ -The ``Var`` class is used to represent physical variables, +The :py:class:`aospy.Var` class is used to represent physical variables, e.g. precipitation rate or potential temperature. This includes both variables which are directly available in netCDF files (e.g. they were -directly outputted by your climate model) as well as those fields that -must be computed from other variables (e.g. they weren't directly -outputted but can be computed from other variables that were -outputted). +directly outputted by your model or gridded data product) as well as +those fields that must be computed from other variables (e.g. they +weren't directly outputted but can be computed from other variables +that were outputted). Geographical regions -------------------- -The ``Region`` class is used to define geographical regions over which -quantities can be averaged (in addition to gridpoint-by-gridpoint -values). Like ``Var`` objects, they are more generic than the objects -of the ``Proj`` - ``Model`` - ``Run`` hierarchy, in that they -correspond to the generic physical quantities/regions rather than the -data of a particular project, model, or simulation. +The :py:class:`aospy.Region` class is used to define geographical +regions over which quantities can be averaged (in addition to +gridpoint-by-gridpoint values). Like :py:class:`aospy.Var` objects, +they are more generic than the objects of the :py:class:`aospy.Proj` - +:py:class:`aospy.Model` - :py:class:`aospy.Run` hierarchy, in that +they correspond to the generic physical quantities/regions rather than +the data of a particular project, model, or simulation. Configuring your object library =============================== @@ -82,11 +76,12 @@ Required components In order for your object library to work with the main script, it must include the following two objects: -1. ``projs`` : A container of ``Proj`` objects -2. ``variables`` : A container of ``Var`` objects +1. ``projs`` : A container of :py:class:`aospy.Proj` objects +2. ``variables`` : A container of :py:class:`aospy.Var` objects -(The ``Model``, ``Run``, and ``Region`` objects are all included -within their parent ``Proj`` objects and thus don't require analogous +(The :py:class:`aospy.Model`, :py:class:`aospy.Run`, and +:py:class:`aospy.Region` objects are all included within their parent +:py:class:`aospy.Proj` objects and thus don't require analogous top-level containers.) These must be accessible from the object library's toplevel namespace, @@ -172,11 +167,10 @@ element. E.g. ``models = ['name-of-my-model']``. .. note:: - Although the main script is the recommended way to perform - calculations, it's possible to submit calculations by other means. - For example, one could explicitly create ``Calc`` objects and call - their ``compute`` method, as is done in the example Jupyter - notebook. + The main script is the recommended way to perform calculations. + Nevertheless, it's possible to submit calculations by other means, + such as by explicitly creating ``Calc`` objects and calling their + ``compute`` method. Running the main script ----------------------- @@ -225,7 +219,7 @@ calculations are generated. up interwoven with one another, leading to output that is confusing to follow. Work is ongoing to improve the logging output when the computations are parallelized. - + Finding the output ------------------ @@ -239,7 +233,8 @@ and in the directory structure within which they are saved. - File name : ``varname.intvl_out.dtype_out_time.'from_'intvl_in'_'dtype_in_time.model.run.date_range.nc`` -See the API reference documentation of ``CalcInterface`` for explanation of each of these components of the path and file name. +See the API reference documentation of ``CalcInterface`` for +explanation of each of these components of the path and file name. Under the hood -------------- @@ -249,9 +244,10 @@ a ``CalcInterface`` object. This object, in turn, is used to instantiate a ``Calc`` object. The ``Calc`` object, in turn, performs the calculation. -Unlike ``Proj``, ``Model``, ``Run``, ``Var``, and ``Region``, these +Unlike :py:class:`aospy.Proj`, :py:class:`aospy.Model`, +:py:class:`aospy.Run`, :py:class:`aospy.Var`, and ``Region``, these objects are not intended to be saved in ``.py`` files for continual re-use. Instead, they are generated as needed, perform their desired tasks, and then go away. -See the API reference documentation for further details. +See the :ref:`API refeerence ` documentation for further details. From a246a391127ff5f641db75d2c7cf2e69a705205f Mon Sep 17 00:00:00 2001 From: Spencer Hill Date: Wed, 29 Mar 2017 13:18:57 -0700 Subject: [PATCH 4/9] Remove "data" level from tar output directory --- aospy/calc.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/aospy/calc.py b/aospy/calc.py index 4ab229d..410675d 100644 --- a/aospy/calc.py +++ b/aospy/calc.py @@ -238,15 +238,13 @@ def _dir_out(self): """Create string of the data directory to save individual .nc files.""" ens_label = utils.io.ens_label(self.ens_mem) return os.path.join(self.proj[0].direc_out, self.proj_str, - self.model_str, self.run_str, ens_label, - self.name) + self.model_str, self.run_str, ens_label, self.name) def _dir_tar_out(self): """Create string of the data directory to store a tar file.""" ens_label = utils.io.ens_label(self.ens_mem) - return os.path.join(self.proj[0].tar_direc_out, - self.proj_str, 'data', self.model_str, - self.run_str, ens_label) + return os.path.join(self.proj[0].tar_direc_out, self.proj_str, + self.model_str, self.run_str, ens_label) def _file_name(self, dtype_out_time, extension='nc'): """Create the name of the aospy file.""" From 57a28f51c4d864f5ed1ca73034c0547ca4d95387 Mon Sep 17 00:00:00 2001 From: Spencer Hill Date: Wed, 29 Mar 2017 13:20:01 -0700 Subject: [PATCH 5/9] Finish examples section; improve submit_mult_calcs docstring --- aospy/automate.py | 13 ++- docs/examples.rst | 238 ++++++++++++++++++++++++++++++++++------------ 2 files changed, 186 insertions(+), 65 deletions(-) diff --git a/aospy/automate.py b/aospy/automate.py index 49fc819..8b4ff17 100644 --- a/aospy/automate.py +++ b/aospy/automate.py @@ -355,8 +355,12 @@ def submit_mult_calcs(calc_suite_specs, exec_options=None): - None : no vertical reduction - 'vert_av' : mass-weighted vertical average - 'vert_int' : mass-weighted vertical integral - input_time_intervals : {'annual', 'monthly', 'daily', '6hr', '3hr'} - A string specifying the time resolution of the input data. + input_time_intervals : {'annual', 'monthly', 'daily', '#hr'} + A string specifying the time resolution of the input data. In + '#hr' above, the '#' stands for a number, e.g. 3hr or 6hr, for + sub-daily output. These are the suggested specifiers, but others + may be used if they are also used by the DataLoaders for the given + Runs. input_time_datatypes : {'inst', 'ts', 'av'} What the time axis of the input data represents: @@ -387,9 +391,10 @@ def submit_mult_calcs(calc_suite_specs, exec_options=None): calculations to be performed and prompt user to confirm before submitting for execution. - parallelize : (default False) If True, submit calculations in - parallel. + parallel. This requires the `multiprocess` library, which can be + installed via `pip install multiprocess`. - write_to_tar : (default True) If True, write results of calculations - to .tar files, one for each object. These tar files have an + to .tar files, one for each :py:class:`aospy.Run` object. These tar files have an identical directory structures the standard output relative to their root directory, which is specified via the `tar_direc_out` argument of each Proj object's instantiation. diff --git a/docs/examples.rst b/docs/examples.rst index 165d9be..edd4563 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -9,7 +9,7 @@ Examples The footnotes in this section provide scientific background to help you understand the motivation and physical meaning of these example calculations. They can be skipped if you are familiar already or - don't care. + aren't interested in those details. In this section, we use the `example data files `_ @@ -17,14 +17,14 @@ included with aospy to demonstrate the standard aospy workflow of executing and submitting multiple calculations at once. These files contain timeseries of monthly averages of two variables -generated by an idealized aquaplanet climate model: [#idealized]_ -[#aquaplanet]_ +generated by an idealized [#idealized]_ aquaplanet [#aquaplanet]_ +climate model: 1. Precipitation generated through gridbox-scale condensation 2. Precipitation generated through the model's convective parameterization [#var-defs]_ -Using this data that was directly outputted by our model, let's +Using this data that was directly outputted by the model, let's compute two other useful quantities: (1) the total precipitation rate, and (2) the fraction of the total precipitation rate that comes from the convective parameterization. We'll compute the time-average over @@ -43,7 +43,6 @@ since we'll be using it in several places below. import aospy rootdir = os.path.join(aospy.__path__[0], 'test', 'data', 'netcdf') - Now we'll use the fantastic `xarray `_ package to inspect the data: @@ -53,10 +52,10 @@ Now we'll use the fantastic `xarray xr.open_mfdataset(os.path.join(rootdir, '000[4-6]0101.precip_monthly.nc'), decode_times=False) -We see that, in this particular climate model, the variable names for -these two forms of precipitation are "condensation_rain" and +We see that, in this particular model, the variable names for these +two forms of precipitation are "condensation_rain" and "convection_rain", respectively. The file also includes the -coordinate arrays ('lat', 'time', etc.) that indicate where in space +coordinate arrays ("lat", "time", etc.) that indicate where in space and time the data refers to. Now that we know where and what the data is, we'll proceed through the @@ -71,7 +70,7 @@ Runs and DataLoaders First we create an :py:class:`aospy.Run` object that stores metadata about this simulation. This includes specifying where its files are -located via a :py:class:`aospy.data_loader.DataLoader` object. +located via an :py:class:`aospy.data_loader.DataLoader` object. DataLoaders specify where your data is located and organized. Several types of DataLoaders exist, each for a different directory and file @@ -86,12 +85,11 @@ dictionary: .. ipython:: python from aospy.data_loader import DictDataLoader - file_map = {'monthly': rootdir + '/aospy/test/data/netcdf/000[4-6]0101.precip_monthly.nc'} + file_map = {'monthly': os.path.join(rootdir, '000[4-6]0101.precip_monthly.nc')} data_loader = DictDataLoader(file_map) -We then pass this to the `Run` constructor, along with a name for the -run and an optional description (see the :ref:`API reference -` for other optional arguments): +We then pass this to the :py:class:`aospy.Run` constructor, along with +a name for the run and an optional description. .. ipython:: python @@ -102,22 +100,32 @@ run and an optional description (see the :ref:`API reference data_loader=data_loader ) +.. note:: + + See the :ref:`API reference ` for other optional arguments + for this and the other core aospy objects used in this tutorial. + Models ====== -Next, We create the :py:class:`aospy.Model` object that describes the +Next, we create the :py:class:`aospy.Model` object that describes the model in which the simulation was executed. One important attribute is ``grid_file_paths``, which consists of a sequence (e.g. a tuple or -list) of netCDF files from which physical attributes of that model -can be found that aren't already embedded in the output netCDF files. +list) of paths to netCDF files from which physical attributes of that +model can be found that aren't already embedded in the output netCDF +files. For example, often the land mask that defines which gridpoints are ocean or land is outputted to a single, standalone netCDF file, rather than being included in the other output files. But we often need the -land mask, e.g. to define certain land-only or ocean-only regions. -This and other grid-related properties shared across all of a Model's -simulations can be found in one or more of the files in -``grid_file_paths``. +land mask, e.g. to define certain land-only or ocean-only +regions. [#land-mask]_ This and other grid-related properties shared +across all of a Model's simulations can be found in one or more of the +files in ``grid_file_paths``. + +The other important attribute is ``runs``, which is a list of the +:py:class:`aospy.Run` objects that pertain to simulations performed in +this particular model. .. ipython:: python @@ -125,8 +133,8 @@ simulations can be found in one or more of the files in example_model = Model( name='example_model', grid_file_paths=( - rootdir + '/aospy/test/data/netcdf/00040101.precip_monthly.nc', - rootdir + '/aospy/test/data/netcdf/im.landmask.nc' + os.path.join(rootdir, '00040101.precip_monthly.nc'), + os.path.join(rootdir, 'im.landmask.nc') ), runs=[example_run] # only one Run in our case, but could be more ) @@ -145,16 +153,19 @@ the directories to which aospy output gets written. 'example_proj', direc_out='example-output', # default, netCDF output (always on) tar_direc_out='example-tar-output', # output to .tar files (optional) - models=(example_model,) # only one Model in our case, but could be more + models=[example_model] # only one Model in our case, but could be more ) +This extra :py:class:`aospy.Proj` level of organization may seem like +overkill for this simple example, but it really comes in handy once +you start using aospy for more than one project. Defining physical quantities and regions ---------------------------------------- Having now fully specified the particular data of interest, we now -define more abstractly the physical quantities of interest and any -geographic regions over which to aggregate results. +define the general physical quantities of interest and any geographic +regions over which to aggregate results. Physical variables ================== @@ -183,7 +194,8 @@ When it comes time to load data corresponding to either of these from one or more particular netCDF files, aospy will search for variables matching either ``name`` or any of the names in ``alt_names``, stopping at the first successful one. This makes the common problem -of model-specific variable names a breeze! +of model-specific variable names a breeze: if you end up with data +with a new name for your variable, just add it to ``alt_names``. .. warning:: @@ -234,9 +246,9 @@ quantity. Although ``variables`` is passed a tuple of ``Var`` objects corresponding to the physical quantities passed to ``func``, ``func`` should be a function whose arguments are the - :py:class:`xarray.DataArray`s corresponding to those variables. - aospy uses the ``Var`` objects to load the DataArrays and then - passes them to the function. + :py:class:`xarray.DataArray` objects corresponding to those + variables. aospy uses the ``Var`` objects to load the DataArrays + and then passes them to the function. This enables you to write simple, expressive functions comprising only the physical operations to perform (since the "data wrangling" @@ -321,6 +333,9 @@ identical to the ones we've defined above. input_vertical_datatypes=[False], ) +See the :ref:`api-ref` on :py:func:`aospy.submit_mult_calcs` for more +on ``calc_suite_specs``, including accepted values for each key. + :py:func:`submit_mult_calcs` also accepts a second dictionary specifying some options regarding how we want aospy to display, execute, and save our calculations. For the sake of this simple @@ -346,6 +361,10 @@ calculation. In this case, it will compute all four variables and perform annual averages, both for each gridpoint and regionally averaged. +Although we do not show it here, this also prints logging information +to the terminal at various steps during each calculation, including +the filepaths to the netCDF files written to disk of the results. + Results ======= @@ -357,29 +376,38 @@ simulation. calcs Each :py:class:`aospy.Calc` object includes the paths to the output -and the results of each output type. .. ipython:: python calcs[0].path_out - calcs[0].data_out -**(S. Hill: Still need to finish up everything below this point)** +and the results of each output type + +.. ipython:: pythoon + + calcs[0].data_out .. note:: - You may have noticed that ``subset_`` and ``raw_`` coordinates have - years 1678 and later, when our data was from model years 4 - through 6. This is because technical details upstream (in numpy) - limit the range of supported years to roughly 1677 to 2234. + You may have noticed that ``subset_...`` and ``raw_...`` + coordinates have years 1678 and later, when our data was from model + years 4 through 6. This is because `technical details upstream + `_ limit the range of supported whole years to 1678-2262. As a workaround, aospy pretends that any timeseries that starts - before the beginning of this range actually starts at the start of - this range. A solution to this problem at the xarray level is - currently under way, at which point all meaningful dates will be - supported without any workarounds or date range limits. + before the beginning of this range actually starts at 1678. An + upstream fix is `currently under way + `_, at which point + all dates will be supported without this workaround. -Let's plot the time average at each gridcell of all four variables: +Gridpoint-by-gridpoint +~~~~~~~~~~~~~~~~~~~~~~ + +Let's plot (using `matplotlib `_) the time +average at each gridcell of all four variables. For demonstration +purposes, we'll load the data that was saved to disk using xarray +rather than getting it directly from the ``data_out`` attribute as +above. .. ipython:: python @@ -387,16 +415,29 @@ Let's plot the time average at each gridcell of all four variables: fig = plt.figure() - for i in range(4): + for i, calc in enumerate(calcs): ax = fig.add_subplot(2, 2, i+1) - xr.open_dataset(calcs[i].path_out['av']).to_array().plot(ax=ax) + arr = xr.open_dataset(calc.path_out['av']).to_array() + if calc.name != precip_conv_frac.name: + arr *= 86400 # convert to units mm per day + arr.plot(ax=ax) + ax.set_title(calc.name) + ax.set_xticks(range(0, 361, 60)) + ax.set_yticks(range(-90, 91, 30)) + + plt.tight_layout() @savefig plot_av.png width=100% plt.show() -We see that the convective precipitation dominates the total in the -Tropics, but moving poleward the gridscale condensation plays an -increasingly larger fractional role. [#ls-conv]_ +We see that precipitation maximizes at the equator and has a secondary +maximum in the mid-latitudes. [#itcz]_ Also, the convective +precipitation dominates the total in the Tropics, but moving poleward +the gridscale condensation plays an increasingly larger fractional +role (note different colorscales in each panel). [#ls-conv]_ + +Regional averages +~~~~~~~~~~~~~~~~~ Now let's examine the regional averages. We find that the global annual mean total precipitation rate for this run (converting to units @@ -404,26 +445,77 @@ of mm per day) is: .. ipython:: python - calcs[0].data_out['reg.av'].to_array()*86400 + for calc in calcs: + ds = xr.open_dataset(calc.path_out['reg.av']) + if calc.name != precip_conv_frac.name: + ds *= 86400 # convert to units mm/day + print(calc.name, ds, '\n') -As was evident from the plots, we see that the global mean -precipitation is dominated by the convective rainfall, and the -tropical mean even more so. +As was evident from the plots, we see that most precipitation (80.8%) +in the tropics comes from convective rainfall, but averaged over the +globe the large-scale condensation is a more equal player (40.2% for +large-scale, 59.8% for convective). Beyond this simple example -------------------------- -We can re-use our object library at will to perform new calculations -or re-compute old ones. We can also add new objects. For example, -suppose we performed a new simulation in which we increased the -surface albedo or introduced a rectangular continent. All we would -have to do is create a corresponding ``Run`` object, and then we can -execute calculations for that simulation! If we had other -simulations, we would create more :py:class:`aospy.Run` instances, one -per simulation. And likewise for models, projects, variables, and -regions. - - +Scaling up +========== + +In this case, we computed time averages of four variables, both at +each gridpoint (which we'll call 1 calculation) and averaged over two +regions, yielding (4 variables)*(1 gridcell operation + (2 regions)*(1 +regional operation)) = 12 total calculations executed. Not bad, but +12 calculations is few enough that we probably could have handled them +without aospy. + +The power of aospy is that, with the infrastructure we've put in +place, we can now fire off additional calculations at any time. Some +examples: + +- Set ``output_time_regional_reductions=['ts', 'std', 'reg.ts', + 'reg.std']`` : calculate the timeseries ('ts') and standard + deviation ('std') of annual mean values at each gridpoint and for + the regional averages. +- Set ``output_time_intervals=range(1, 13)`` : average across years + for each January (1), each February (2), etc. through December + (12). [#seasonal]_ + +With these settings, the number of calculations is now (4 +variables)*(2 gridcell operations + (2 regions)*(2 regional +operations))*(12 temporal averages) = 288 calculations submitted with +a single command. + +Modifying your object library +============================= + +We can also add new objects to our object library at any time. For +example, suppose we performed a new simulation in which we modified +the formulation of the convective parameterization. All we would have +to do is create a corresponding :py:class:`aospy.Run` object, and then +we can execute calculations for that simulation. And likewise for +models, projects, variables, and regions. + +As a real-world example, two of aospy's developers use aospy for in +their own scientific research, with multiple projects each comprising +multiple models, simulations, etc. They routinely fire off thousands +of calculations at once. And thanks to the highly organized and +metadata-rich directory structure and filenames of the aospy output +netCDF files, all of the resulting data is easy to find and use. + +Example "main" script +===================== + +Finally, aospy comes included with a "main" script for submitting +calculations that is pre-populated with the objects from the example +object library. It also comes with in-line instructions on how to use +it, whether you want to keep playing with the example library or +modify it to use on your own object library. + +It is located in "examples" directory of your aospy installation. +Find it via typing ``python -c "import os, aospy; +print(os.path.join(aospy.__path__[0], 'examples', 'aospy_main.py'))"`` +from your terminal. .. ipython:: python :suppress: @@ -463,6 +555,22 @@ regions. whole isn't saturated. The total precipitation is simply the sum of these "large-scale" and "convective" components. +.. [#land-mask] + + In this case, the model being used is an aquaplanet, so the mask + will be simply all ocean. But this is not generally the case -- + comprehensive climate and weather models include Earth's full + continental geometry and land topography (at least as well as can + be resolved at their particular horizontal grid resolution). + +.. [#itcz] + + This equatorial rainband is called the Intertropical Convergence + Zone, or ITCZ. In this simulation, the imposed solar radiation is + fixed at Earth's annual mean value, which is symmetric about the + equator. The ITCZ typically follows the solar radiation maximum, + hence its position in this case directly on the equator. + .. [#ls-conv] This is a very common result. The gridcells of many climate models @@ -471,3 +579,11 @@ regions. towers that are much smaller than this. But in the mid-latitudes, a phenomenon known as baroclinic instability generates much larger eddies that can span several hundred kilometers. + +.. [#seasonal] + + In this particular simulation, the boundary conditions are constant + in time, so there is no seasonal cycle. But we could use these + monthly averages to confirm that's actually the case, i.e. that we + didn't accidentally use time-varying solar radiation when we ran + the model. From 4699b16341a206366957cc346a321ddfd44bdb7d Mon Sep 17 00:00:00 2001 From: Spencer Hill Date: Wed, 29 Mar 2017 13:20:28 -0700 Subject: [PATCH 6/9] Formatting improvements to API and Using aospy --- docs/api.rst | 65 ++++++++++++++++++++++---------------------- docs/using-aospy.rst | 24 ++++++++-------- 2 files changed, 45 insertions(+), 44 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 39bd46f..e8e0072 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -21,8 +21,8 @@ Core Hierarchy for Input Data aospy provides three classes for specifying the location and characteristics of data saved on disk as netCDF files that the user -wishes to use as input data for aospy calculations: ``Proj``, -``Model``, and ``Run``. +wishes to use as input data for aospy calculations: :py:class:`Proj`, +:py:class:`Model`, and :py:class:`Run`. Proj ---- @@ -54,15 +54,15 @@ Run DataLoaders =========== -``Run`` objects rely on a helper "data loader" to specify how to find +:py:class:`Run` objects rely on a helper "data loader" to specify how to find their underlying data that is saved on disk. This mapping of variables, time ranges, and potentially other parameters to the location of the corresponding data on disk can differ among modeling centers or even between different models at the same center. -Currently supported data loader types are ``DictDataLoader``, -``NestedDictDataLoader``, and ``GFDLDataLoader`` Each of these inherit -from the abstract base ``DataLoader`` class. +Currently supported data loader types are :py:class:`DictDataLoader`, +:py:class:`NestedDictDataLoader`, and :py:class:`GFDLDataLoader` Each of these inherit +from the abstract base :py:class:`DataLoader` class. .. autoclass:: aospy.data_loader.DataLoader :members: @@ -91,14 +91,14 @@ from the abstract base ``DataLoader`` class. Variables and Regions ===================== -The ``Var`` and ``Region`` classes are used to represent, +The :py:class:`Var` and :py:class:`Region` classes are used to represent, respectively, physical quantities the user wishes to be able to compute and geographical regions over which the user wishes to aggregate their calculations. -Whereas the ``Proj`` - ``Model`` - ``Run`` hierarchy is used to -describe the data resulting from particular model simulations, ``Var`` -and ``Region`` represent the properties of generic physical entities +Whereas the :py:class:`Proj` - :py:class:`Model` - :py:class:`Run` hierarchy is used to +describe the data resulting from particular model simulations, :py:class:`Var` +and :py:class:`Region` represent the properties of generic physical entities that do not depend on the underlying data. Var @@ -122,24 +122,24 @@ Region Calculations ============ -``Calc`` is the engine that combines the user's specifications of (1) -the data on disk via ``Proj``, ``Model``, and ``Run``, (2) the -physical quantity to compute and regions to aggregate over via ``Var`` -and ``Region``, and (3) the desired date range, time reduction method, +:py:class:`Calc` is the engine that combines the user's specifications of (1) +the data on disk via :py:class:`Proj`, :py:class:`Model`, and :py:class:`Run`, (2) the +physical quantity to compute and regions to aggregate over via :py:class:`Var` +and :py:class:`Region`, and (3) the desired date range, time reduction method, and other characteristics to actually perform the calculation -Whereas ``Proj``, ``Model``, ``Run``, ``Var``, and ``Region`` are all -intended to be saved in ``.py`` files for reuse, ``Calc`` objects are +Whereas :py:class:`Proj`, :py:class:`Model`, :py:class:`Run`, :py:class:`Var`, and :py:class:`Region` are all +intended to be saved in ``.py`` files for reuse, :py:class:`Calc` objects are intended to be generated dynamically by a main script and then not retained after they have written their outputs to disk following the user's specifications. Moreover, if the ``main.py`` script is used to execute calculations, -no direct interfacing with ``Calc`` or it's helper class, -``CalcInterface`` is required by the user, in which case this section +no direct interfacing with :py:class:`Calc` or it's helper class, +:py:class:`CalcInterface` is required by the user, in which case this section should be skipped entirely. -Also included is the ``automate`` module, which enables aospy e.g. in +Also included is the :py:class:`automate` module, which enables aospy e.g. in the main script to find objects in the user's object library that the user specifies via their string names rather than having to import the objects themselves. @@ -171,7 +171,7 @@ operator .. warning:: - The ``operator`` module is in the process of being re-vamped and + The :py:class:`operator` module is in the process of being re-vamped and is therefore currently not supported. .. automodule:: aospy.operator @@ -181,21 +181,22 @@ operator Units and Constants =================== -aospy provides the classes ``Constant`` and ``Units`` for -representing, respectively, physical constants (e.g. Earth's +aospy provides the classes :py:class:`Constant` and :py:class:`Units` +for representing, respectively, physical constants (e.g. Earth's gravitational acceleration at the surface = 9.81 m/s^2) and physical units (e.g. meters per second squared in that example). aospy comes with several commonly used constants saved within the -``constants`` module in which the ``Constant`` class is also defined. -In contrast, there are no pre-defined ``Units`` objects; the user must -define any ``Units`` objects they wish to use (e.g. to populate the -``units`` attribute of their ``Var`` objects). +:py:class:`constants` module in which the :py:class:`Constant` class +is also defined. In contrast, there are no pre-defined +:py:class:`Units` objects; the user must define any :py:class:`Units` +objects they wish to use (e.g. to populate the :py:class:`units` +attribute of their :py:class:`Var` objects). -Similarly, whereas these baked-in ``Constant`` objects are used by -aospy in various places, aospy currently does not actually use the -``Var.units`` attribute or the ``Units`` class more generally; they -are for the user's own informational purposes. +Similarly, whereas these baked-in :py:class:`Constant` objects are +used by aospy in various places, aospy currently does not actually use +the ``Var.units`` attribute or the :py:class:`Units` class more +generally; they are for the user's own informational purposes. constants --------- @@ -215,8 +216,8 @@ units There has been discussion of implementing units-handling upstream within xarray (see `here - `_). If and when that - happens, the ``Units`` class will likely be deprecated and replaced + `_). If that happens, + the :py:class:`Units` class will likely be deprecated and replaced with the upstream version. Utilities diff --git a/docs/using-aospy.rst b/docs/using-aospy.rst index 7056941..36c8592 100644 --- a/docs/using-aospy.rst +++ b/docs/using-aospy.rst @@ -169,8 +169,8 @@ element. E.g. ``models = ['name-of-my-model']``. The main script is the recommended way to perform calculations. Nevertheless, it's possible to submit calculations by other means, - such as by explicitly creating ``Calc`` objects and calling their - ``compute`` method. + such as by explicitly creating :py:class:`aospy.Calc` objects and + calling their ``compute`` method. Running the main script ----------------------- @@ -233,21 +233,21 @@ and in the directory structure within which they are saved. - File name : ``varname.intvl_out.dtype_out_time.'from_'intvl_in'_'dtype_in_time.model.run.date_range.nc`` -See the API reference documentation of ``CalcInterface`` for -explanation of each of these components of the path and file name. +See the API reference documentation of :py:class:`aospy.CalcInterface` +for explanation of each of these components of the path and file name. Under the hood -------------- The main script encodes each permutation of the input parameters into -a ``CalcInterface`` object. This object, in turn, is used to -instantiate a ``Calc`` object. The ``Calc`` object, in turn, performs -the calculation. +a :py:class:`CalcInterface` object. This object, in turn, is used to +instantiate a :py:class:`Calc` object. The :py:class:`Calc` object, +in turn, performs the calculation. Unlike :py:class:`aospy.Proj`, :py:class:`aospy.Model`, -:py:class:`aospy.Run`, :py:class:`aospy.Var`, and ``Region``, these -objects are not intended to be saved in ``.py`` files for continual -re-use. Instead, they are generated as needed, perform their desired -tasks, and then go away. +:py:class:`aospy.Run`, :py:class:`aospy.Var`, and +:py:class:`aospy.Region`, these objects are not intended to be saved +in ``.py`` files for continual re-use. Instead, they are generated as +needed, perform their desired tasks, and then go away. -See the :ref:`API refeerence ` documentation for further details. +See the :ref:`API reference ` documentation for further details. From cbf1559320cd1f0f7c07760332e01f575ad308dc Mon Sep 17 00:00:00 2001 From: Spencer Hill Date: Wed, 29 Mar 2017 15:02:40 -0700 Subject: [PATCH 7/9] Update what's new --- docs/whats-new.rst | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/docs/whats-new.rst b/docs/whats-new.rst index ad7ffc7..50e551b 100644 --- a/docs/whats-new.rst +++ b/docs/whats-new.rst @@ -9,47 +9,54 @@ v0.1.2 (XX March 2017) ---------------------- This release improves the process of submitting multiple calculations -for automatic execution. the user interface, documentation, internal +for automatic execution. The user interface, documentation, internal logic, and packaging all received upgrades and/or bugfixes. Enhancements ~~~~~~~~~~~~ +- Improve Examples page of the documentation by using the newly + improved example main script and object libraries (:pull:`164`). By + `Spencer Hill `_. +- Include an example library of aospy objects that works + out-of-the-box with the provided example main script (:pull:`155`). + By `Spencer Clark `_ and `Spencer + Hill `_. - Improve readability/usability of the included example script ``aospy_main.py`` for submitting aospy calculations by moving all - internal logic into new ``automate.py`` module (fixes :issue:`152` - via :pull:`155`). By `Spencer Clark - `_ and `Spencer Hill - `_. -- Include an example library of aospy objects that works - out-of-the-box with the provided example main script (fixes - :issue:`151` via :pull:`155`). By `Spencer Clark - `_ and `Spencer Hill - `_. + internal logic into new ``automate.py`` module (:pull:`155`). By + `Spencer Clark `_ and `Spencer + Hill `_. - Enable user to specify whether or not to write output to .tar files (in addition to the standard output). Also document an error that occurs when writing output to .tar files for sufficiently old versions of tar (including the version that ships standard on MacOS), and print a warning when errors are caught during the 'tar' - call (fixes one-half of :issue:`157` via :pull:`160`). By `Spencer Hill + call (:pull:`160`). By `Spencer Hill `_. Bug fixes ~~~~~~~~~ +- Update packaging specifications such that the example main script + and tutorial notebook actually ship with aospy as intended (fixes + :issue:`149` via :pull:`161`). By `Spencer Hill + `_. - Use the 'scipy' engine for the `xarray.DataArray.to_netcdf `_ call when writing aospy calculation outputs to disk to prevent a bug when trying to re-write to an existing netCDF file (fixes one-half - of :issue:`157` via :pull:`160`). + of :issue:`157` via :pull:`160`). By `Spencer Hill + `_. .. _whats-new.0.1.1: v0.1.1 (2 March 2017) --------------------- -This release includes fixes for a number of bugs mistakenly introduced in the -refactoring of the variable loading step of ``calc.py`` (:pull:`90`), as well as -support for xarray version 0.9.1. + +This release includes fixes for a number of bugs mistakenly introduced +in the refactoring of the variable loading step of ``calc.py`` +(:pull:`90`), as well as support for xarray version 0.9.1. Enhancements ~~~~~~~~~~~~ From 1a1015beced58cc32371a5804556ee448571ae5b Mon Sep 17 00:00:00 2001 From: Spencer Hill Date: Wed, 29 Mar 2017 17:55:48 -0700 Subject: [PATCH 8/9] DOC Semi-major updates to 'Using aospy'; other more minor doc edits --- aospy/automate.py | 18 ++-- docs/api.rst | 11 ++- docs/index.rst | 21 +++-- docs/install.rst | 4 +- docs/overview.rst | 3 +- docs/using-aospy.rst | 206 ++++++++++++++++++++++--------------------- 6 files changed, 135 insertions(+), 128 deletions(-) diff --git a/aospy/automate.py b/aospy/automate.py index 8b4ff17..d202eab 100644 --- a/aospy/automate.py +++ b/aospy/automate.py @@ -401,19 +401,21 @@ def submit_mult_calcs(calc_suite_specs, exec_options=None): Returns ------- - A list of the return values for each :py:meth:`aospy.Calc.compute` call - that was made. If a calculation ran without error, this value is the - :py:class`aospy.Calc` object itself, with the results of its calculations - saved in its ``data_out`` attribute. ``data_out`` is a dictionary, with - the keys being the temporal-regional reduction identifiers (e.g. 'reg.av'), - and the values being the corresponding result. + A list of the return values from each :py:meth:`aospy.Calc.compute` call + If a calculation ran without error, this value is the + :py:class:`aospy.Calc` object itself, with the results of its + calculations saved in its ``data_out`` attribute. ``data_out`` is a + dictionary, with the keys being the temporal-regional reduction + identifiers (e.g. 'reg.av'), and the values being the corresponding + result. If any error occurred during a calculation, the return value is None. Raises ------ - AospyException : if the ``prompt_verify`` option is set to True and the - user does not respond affirmatively to the prompt. + AospyException + If the ``prompt_verify`` option is set to True and the user does not + respond affirmatively to the prompt. """ if exec_options is None: diff --git a/docs/api.rst b/docs/api.rst index e8e0072..9d7b632 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -193,10 +193,13 @@ is also defined. In contrast, there are no pre-defined objects they wish to use (e.g. to populate the :py:class:`units` attribute of their :py:class:`Var` objects). -Similarly, whereas these baked-in :py:class:`Constant` objects are -used by aospy in various places, aospy currently does not actually use -the ``Var.units`` attribute or the :py:class:`Units` class more -generally; they are for the user's own informational purposes. +.. warning:: + + Whereas these baked-in :py:class:`Constant` objects are used by + aospy in various places, aospy currently does not actually use the + ``Var.units`` attribute during calculations or the + :py:class:`Units` class more generally; they are solely for the + user's own informational purposes. constants --------- diff --git a/docs/index.rst b/docs/index.rst index 96d9df1..ea7b441 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -21,14 +21,12 @@ computations that use gridded climate and weather data (namely data stored as netCDF files) and the management of the results of those computations. -After you use aospy's built-in tools to describe where to find your -data, use aospy's "main" script whenever you want to fire off -calculations to be performed in parallel using the permutation of an -arbitrary number of climate models, simulations, variables to be -computed, date ranges, sub-annual-sampling, and many other parameters. -In other words, it is possible using aospy to submit and execute *all* -calculations for a particular project (e.g. paper, class project, or -thesis chapter) with a single command! +aospy enables firing off multiple calculations in parallel using the +permutation of an arbitrary number of climate models, simulations, +variables to be computed, date ranges, sub-annual-sampling, and many +other parameters. In other words, it is possible using aospy to +submit and execute *all* calculations for a particular project +(e.g. paper, class project, or thesis chapter) with a single command! The results get saved in a highly organized directory tree as netCDF files, making it easy to subsequently find and use the data (e.g. for @@ -39,8 +37,7 @@ computed. The eventual goal is for aospy to become the community standard for gridded climate data analysis and, in so doing, accelerate progress in climate science and make the results of climate research more easily -reproducible and shareable. aospy relies heavily on the `xarray -`_ package. +reproducible and shareable. Documentation ============= @@ -64,6 +61,8 @@ See also at the Seventh Symposium on Advances in Modeling and Analysis Using Python, recorded 2017 January 24 as part of the 2017 American Meteorological Society Annual Meeting. +- The `xarray `_ package, upon which aospy + relies heavily. Get in touch ============ @@ -71,7 +70,7 @@ Get in touch - Troubleshooting: We are actively seeking new users and are eager to help you get started with aospy! Usage questions, bug reports, and any other correspondence are all welcome and best placed as `Issues - `_ on our Github repo + `_ on our `Github repo `_. - Contributing: We are also actively seeking new developers! Please get in touch by opening an Issue or submitting a Pull Request. diff --git a/docs/install.rst b/docs/install.rst index a0cfe8b..b110aff 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -35,8 +35,8 @@ The recommended installation method is via `conda of packages on Python 3.4. Please use one of the alternative installation methods described below. -Alternative method #1: via ``pip`` ----------------------------------- +Alternative method #1: pip +-------------------------- aospy is available from the official `Python Packaging Index (PyPI) `_ via ``pip``:: diff --git a/docs/overview.rst b/docs/overview.rst index 73cb8da..ebaee40 100644 --- a/docs/overview.rst +++ b/docs/overview.rst @@ -21,8 +21,7 @@ lines, then aospy may be a great tool for you: averaged over these 10 regions. That's impractical -- I'll just do a small subset." -With aospy, you can easily resolve these issues and thereby accelerate -your research. +Each of these common problems is easily solved by using aospy. .. _design-philosophy: diff --git a/docs/using-aospy.rst b/docs/using-aospy.rst index 36c8592..c76f6ae 100644 --- a/docs/using-aospy.rst +++ b/docs/using-aospy.rst @@ -6,8 +6,8 @@ Using aospy This section provides a high-level summary of how to use aospy. See the :ref:`Overview ` section of this documentation for more -background information, or the :ref:`Examples` section and associated -Jupyter Notebook for concrete examples. +background information, or the :ref:`Examples` section for concrete +examples. Your aospy object library ========================= @@ -25,13 +25,13 @@ runs (i.e. simulations). This involves a hierarchy of three classes, :py:class:`aospy.Proj`, :py:class:`aospy.Model`, and :py:class:`aospy.Run`. -1. :py:class:`aospy.Proj`: This represents a single project that +1. :py:class:`aospy.Proj`: This represents a single project that involves analysis of data from one or more models and simulations. -2. :py:class:`aospy.Model`: This represents a single climate model, +2. :py:class:`aospy.Model`: This represents a single climate model, other numerical model, observational data source, etc. -3. :py:class:`aospy.Run`: This represents a single simulation, +3. :py:class:`aospy.Run`: This represents a single simulation, version of observational data, etc. So each user's object library will contain one or more @@ -67,55 +67,50 @@ they are more generic than the objects of the :py:class:`aospy.Proj` - they correspond to the generic physical quantities/regions rather than the data of a particular project, model, or simulation. -Configuring your object library -=============================== - -Required components -------------------- - -In order for your object library to work with the main script, it must -include the following two objects: - -1. ``projs`` : A container of :py:class:`aospy.Proj` objects -2. ``variables`` : A container of :py:class:`aospy.Var` objects - -(The :py:class:`aospy.Model`, :py:class:`aospy.Run`, and -:py:class:`aospy.Region` objects are all included within their parent -:py:class:`aospy.Proj` objects and thus don't require analogous -top-level containers.) - -These must be accessible from the object library's toplevel namespace, -i.e. the Python commands ``import my_obj_lib.projs`` and ``import -my_obj_lib.variables`` must work, where ``my_obj_lib`` is the name -you've given to your library. Which leads to the next topic: how to -structure your object library within one or more ``.py`` files. - -File/directory structure +Object library structure ------------------------ -The simplest way to structure your object library is to define -everything in a single module (i.e. a single ``.py`` file). This -works great for small projects and for initially trying out aospy. - -As an object library grows, however, it can become desirable to split -it into multiple ``.py`` files. This effectively changes it from a -module to a proper Python package. Python packages require a specific -directory structure and specification of things to include at each -level via ``__init__.py`` files. See the `official documentation +The officially supported way to submit calculations is the +:py:meth:`aospy.submit_mult_calcs` function. In order for this to +work, your object library must follow one or the other of these +structures: + +1. All :py:class:`aospy.Proj` and :py:class:`aospy.Var` objects are + accessible as attributes of your library. This means that + ``my_obj_lib.my_obj`` works, where ``my_obj_lib`` is + your object library, and ``my_obj`` is the object in question. +2. All :py:class:`aospy.Proj` objects are stored in a container called + ``projs``, where ``projs`` is an attribute of your library + (i.e. ``my_obj_lib.projs``). And likewise for + :py:class:`aospy.Var` objects in a ``variables`` attribute. + +Beyond that, you can structure your object library however you wish. +In particular, it can be structured as a Python module (i.e. a single +".py" file) or as a package (i.e. multiple ".py" files linked +together; see the `official documentation `_ on -packages for further guidance. - -For an example of a large object library that is structured as a -proper package, see `here +package structuring). + +A single module works great for small projects and for initially +trying out aospy (this is how the example object library, +:py:mod:`aospy.examples.example_obj_lib`, is structured). But as +your object library grows, it can become easier to manage as a package +of multiple files. For an example of a large object library that is +structured as a formal package, see `here `_. -Making your object library visible to Python --------------------------------------------- +Accessing your library +---------------------- + +If your current working directory is the one containing your library, +you can import your library via ``import my_obj_lib`` (replacing +``my_obj_lib`` with whatever you've named yours) in order to pass it +to :py:meth:`aospy.submit_mult_calcs`. -Whether it is structured as a single module or as a proper package, -you'll likely have to add the directory containing your object library -to the ``PYTHONPATH`` environment variable in order for Python to be -able to import it:: +Once you start using aospy a lot, however, this requirement of being +in the same directory becomes cumbersome. As a solution, you can add +the directory containing your object library to the ``PYTHONPATH`` +environment variable. E.g if you're using the bash shell: :: export PYTHONPATH=/path/to/your/object/library:${PYTHONPATH} @@ -123,7 +118,8 @@ Of course, replace ``/path/to/your/object/library`` with the actual path to yours. This command places your object library at the front of the ``PYTHONPATH`` environment variable, which is essentially the first place where Python looks to find packages and modules to be -imported. +imported. (For more, see Python's `official documentation on +PYTHONPATH `_). .. note:: @@ -131,53 +127,30 @@ imported. for the bash shell on Linux or Mac, ``~/.bash_profile``) so that you don't have to call it again in every new terminal session. -.. note:: - - For object libraries structured as packages, it is also possible to - properly install your object library by creating a properly set-up - ``setup.py`` file and ``python setup.py install``. But unless - you're prevented from modifying ``PYTHONPATH`` for some reason, - there's no advantage of this versus the simpler - ``PYTHONPATH`` alternative above. - -Once this has been done, you should be able to import your object -library from within Python via ``import my_obj_lib``, where -``my_obj_lib`` is the name you've given to your library. You will not -be able to use the main script until this works. +To test this is working, run ``python -c "import my_obj_lib"`` from a +directory other than where the library is located (again replacing +``my_obj_lib`` with the name you've given to your library). If this +runs without error, you should be good to go. Executing calculations ====================== -The main script contents ------------------------- - -Calculations are performed by specifying in a "main script" the -desired parameters and then running the script. +As noted above, the officially supported way to submit calculations is the +:py:meth:`aospy.submit_mult_calcs` function. -We provide a template main script within aospy. You should copy it to -the location of your choice and in the copy replace the given names -with the names of your own project, model, etc. objects that you want -to perform computations on. (If you accidentally change the original, -you can always get a `fresh copy from Github -`_.) - -Except where noted otherwise in the template script's comments, all -parameters should be submitted as lists, even if they are a single -element. E.g. ``models = ['name-of-my-model']``. - -.. note:: - - The main script is the recommended way to perform calculations. - Nevertheless, it's possible to submit calculations by other means, - such as by explicitly creating :py:class:`aospy.Calc` objects and - calling their ``compute`` method. +We provide a template "main" script with aospy that uses this +function. We recommend copying it to the location of your choice. In +the copy, replace the example object library and associated objects +with your own. (If you accidentally change the original, you can +always get a `fresh copy from Github +`_). Running the main script ----------------------- Once the main script parameters are all modified as desired, execute the script from the command line as follows :: - /path/to/your/main.py + /path/to/your/aospy_main.py This should generate a text summary of the specified parameters and a prompt as to whether to proceed or not with the calculations. An @@ -196,6 +169,9 @@ the number of permutations. line, run the script and then start an interactive IPython session via ``ipython -i /path/to/your/main.py``. + Or you can call :py:func:`aospy.submit_mult_calcs` directly within + an interactive session. + As the calculations are performed, logging information will be printed to the terminal displaying their progress. @@ -205,7 +181,8 @@ Parallelized calculations The calculations generated by the main script can be executed in parallel provided the optional dependency ``multiprocess`` is installed. (It is available via pip: ``pip install multiprocess``.) -Otherwise, or if the user sets ``parallelize`` to ``False`` in the main +Otherwise, or if the user sets ``parallelize=False`` in the +``calc_exec_options`` argument of :py:func:`aospy.submit_mult_calcs`, script, the calculations will be executed one-by-one. Particularly on instititutional clusters with many cores, this @@ -220,7 +197,6 @@ calculations are generated. to follow. Work is ongoing to improve the logging output when the computations are parallelized. - Finding the output ------------------ @@ -233,21 +209,49 @@ and in the directory structure within which they are saved. - File name : ``varname.intvl_out.dtype_out_time.'from_'intvl_in'_'dtype_in_time.model.run.date_range.nc`` -See the API reference documentation of :py:class:`aospy.CalcInterface` -for explanation of each of these components of the path and file name. +See the :ref:`api-ref` on :py:class:`aospy.CalcInterface` for +explanation of each of these components of the path and file name. Under the hood --------------- - -The main script encodes each permutation of the input parameters into -a :py:class:`CalcInterface` object. This object, in turn, is used to -instantiate a :py:class:`Calc` object. The :py:class:`Calc` object, -in turn, performs the calculation. - -Unlike :py:class:`aospy.Proj`, :py:class:`aospy.Model`, -:py:class:`aospy.Run`, :py:class:`aospy.Var`, and -:py:class:`aospy.Region`, these objects are not intended to be saved -in ``.py`` files for continual re-use. Instead, they are generated as -needed, perform their desired tasks, and then go away. +============== + +:py:func:`aospy.submit_mult_calcs` creates a :py:class:`aospy.CalcSuite` +object that permutes over the provided lists of calculation +specifications, encoding each permutations into +:py:class:`aospy.CalcInterface` objects. + +.. note:: + + Actually, when multiple regions and/or output time/regional + reductions are specified, these all get passed to each + :py:class:`aospy.CalcInterface` object rather than being permuted + over. They are then looped over during the subsequent + calculations. This is to prevent unnecessary re-loading and + re-computing, because, for a given simulation/variable/etc., all + regions and reduction methods use the same data. + +Each :py:class:`aospy.CalcInterface` object, in turn, is used to +instantiate a :py:class:`aospy.Calc` object. The +:py:class:`aospy.Calc` object, in turn: + +- loads the required netCDF data given its simulation, variable, and date range +- (if necessary) further truncates the data in time (i.e. to the given + subset of the annual cycle, and/or if the requested date range + doesn't exactly align with the time chunking of the input netCDF + files) +- (if the variable is a function of other variables) executes the + function that computes the calculation using this loaded and + truncated data +- applies all specified temporal and regional time reductions +- writes the results (plus additional metadata) to disk as netCDF + files and appends it to its own ``data_out`` attribute + +.. note:: + + Unlike :py:class:`aospy.Proj`, :py:class:`aospy.Model`, + :py:class:`aospy.Run`, :py:class:`aospy.Var`, and + :py:class:`aospy.Region`, these objects are not intended to be + saved in ``.py`` files for continual re-use. Instead, they are + generated as needed, perform their desired tasks, and then go away. See the :ref:`API reference ` documentation for further details. From 95c31406476a9d567301ace27bc8d279dca8b171 Mon Sep 17 00:00:00 2001 From: Spencer Hill Date: Wed, 29 Mar 2017 20:08:59 -0700 Subject: [PATCH 9/9] fix typo --- docs/using-aospy.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/using-aospy.rst b/docs/using-aospy.rst index c76f6ae..575a49c 100644 --- a/docs/using-aospy.rst +++ b/docs/using-aospy.rst @@ -217,8 +217,8 @@ Under the hood :py:func:`aospy.submit_mult_calcs` creates a :py:class:`aospy.CalcSuite` object that permutes over the provided lists of calculation -specifications, encoding each permutations into -:py:class:`aospy.CalcInterface` objects. +specifications, encoding each permutation into a +:py:class:`aospy.CalcInterface` object. .. note::