From 50c2b8951b29a3c883a778becbf8582f9519eb48 Mon Sep 17 00:00:00 2001 From: Anil Kumar <108816337+AnilKumar-NOAA@users.noreply.github.com> Date: Tue, 28 May 2024 13:23:53 -0400 Subject: [PATCH] Global-workflow (AR) Generic updates for Gaea C5 (#2515) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Port global-workflow’s build and run capability to Gaea-C5 - Building global-workflow on Gaea-C5 - Setting up experiments with global-workflow on Gaea-C5 --------- Co-authored-by: AnilKumar-NOAA Co-authored-by: DavidBurrows-NCO <82525974+DavidBurrows-NCO@users.noreply.github.com> --- env/GAEA.env | 39 +++++++++++++++++++++++++++++ modulefiles/module_base.gaea.lua | 39 +++++++++++++++++++++++++++++ modulefiles/module_gwsetup.gaea.lua | 21 ++++++++++++++++ parm/config/gfs/config.aero | 3 +++ parm/config/gfs/config.resources | 1 + sorc/link_workflow.sh | 1 + ush/detect_machine.sh | 6 ++--- ush/module-setup.sh | 33 ++---------------------- versions/build.gaea.ver | 6 +++++ versions/run.gaea.ver | 6 +++++ workflow/hosts.py | 4 ++- workflow/hosts/gaea.yaml | 25 ++++++++++++++++++ 12 files changed, 148 insertions(+), 36 deletions(-) create mode 100755 env/GAEA.env create mode 100644 modulefiles/module_base.gaea.lua create mode 100644 modulefiles/module_gwsetup.gaea.lua create mode 100644 versions/build.gaea.ver create mode 100644 versions/run.gaea.ver create mode 100644 workflow/hosts/gaea.yaml diff --git a/env/GAEA.env b/env/GAEA.env new file mode 100755 index 0000000000..c19fecc934 --- /dev/null +++ b/env/GAEA.env @@ -0,0 +1,39 @@ +#! /usr/bin/env bash + +if [[ $# -ne 1 ]]; then + + echo "Must specify an input argument to set runtime environment variables!" + echo "argument can be any one of the following:" + echo "fcst atmos_products" + exit 1 + +fi + +step=$1 + +export launcher="srun -l --export=ALL" +export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" + +ulimit -s unlimited +ulimit -a + +if [[ "${step}" = "fcst" ]]; then + + if [[ "${CDUMP}" =~ "gfs" ]]; then + nprocs="npe_${step}_gfs" + ppn="npe_node_${step}_gfs" || ppn="npe_node_${step}" + else + nprocs="npe_${step}" + ppn="npe_node_${step}" + fi + (( nnodes = (${!nprocs}+${!ppn}-1)/${!ppn} )) + (( ntasks = nnodes*${!ppn} )) + # With ESMF threading, the model wants to use the full node + export APRUN_UFS="${launcher} -n ${ntasks}" + unset nprocs ppn nnodes ntasks + +elif [[ "${step}" = "atmos_products" ]]; then + + export USE_CFP="YES" # Use MPMD for downstream product generation + +fi diff --git a/modulefiles/module_base.gaea.lua b/modulefiles/module_base.gaea.lua new file mode 100644 index 0000000000..55ad6b0c34 --- /dev/null +++ b/modulefiles/module_base.gaea.lua @@ -0,0 +1,39 @@ +help([[ +Load environment to run GFS on Gaea +]]) + +local spack_mod_path=(os.getenv("spack_mod_path") or "None") +prepend_path("MODULEPATH", spack_mod_path) + +load(pathJoin("stack-intel", (os.getenv("stack_intel_ver") or "None"))) +load(pathJoin("stack-cray-mpich", (os.getenv("stack_cray_mpich_ver") or "None"))) +load(pathJoin("python", (os.getenv("python_ver") or "None"))) + +load(pathJoin("jasper", (os.getenv("jasper_ver") or "None"))) +load(pathJoin("libpng", (os.getenv("libpng_ver") or "None"))) +load(pathJoin("cdo", (os.getenv("cdo_ver") or "None"))) +load(pathJoin("hdf5", (os.getenv("hdf5_ver") or "None"))) +load(pathJoin("netcdf-c", (os.getenv("netcdf_c_ver") or "None"))) +load(pathJoin("netcdf-fortran", (os.getenv("netcdf_fortran_ver") or "None"))) + +load(pathJoin("nco", (os.getenv("nco_ver") or "None"))) +load(pathJoin("prod_util", (os.getenv("prod_util_ver") or "None"))) +load(pathJoin("grib-util", (os.getenv("grib_util_ver") or "None"))) +load(pathJoin("g2tmpl", (os.getenv("g2tmpl_ver") or "None"))) +load(pathJoin("gsi-ncdiag", (os.getenv("gsi_ncdiag_ver") or "None"))) +load(pathJoin("crtm", (os.getenv("crtm_ver") or "None"))) +load(pathJoin("bufr", (os.getenv("bufr_ver") or "None"))) +load(pathJoin("wgrib2", (os.getenv("wgrib2_ver") or "None"))) +load(pathJoin("py-netcdf4", (os.getenv("py_netcdf4_ver") or "None"))) +load(pathJoin("py-pyyaml", (os.getenv("py_pyyaml_ver") or "None"))) +load(pathJoin("py-jinja2", (os.getenv("py_jinja2_ver") or "None"))) +load(pathJoin("py-pandas", (os.getenv("py_pandas_ver") or "None"))) +load(pathJoin("py-python-dateutil", (os.getenv("py_python_dateutil_ver") or "None"))) +load(pathJoin("met", (os.getenv("met_ver") or "None"))) +load(pathJoin("metplus", (os.getenv("metplus_ver") or "None"))) +load(pathJoin("py-xarray", (os.getenv("py_xarray_ver") or "None"))) + +setenv("WGRIB2","wgrib2") +setenv("UTILROOT",(os.getenv("prod_util_ROOT") or "None")) + +whatis("Description: GFS run setup environment") diff --git a/modulefiles/module_gwsetup.gaea.lua b/modulefiles/module_gwsetup.gaea.lua new file mode 100644 index 0000000000..5a8b2379a9 --- /dev/null +++ b/modulefiles/module_gwsetup.gaea.lua @@ -0,0 +1,21 @@ +help([[ +Load environment to run GFS workflow setup scripts on Gaea +]]) + +prepend_path("MODULEPATH", "/ncrc/proj/epic/rocoto/modulefiles") +load(pathJoin("rocoto")) + +prepend_path("MODULEPATH", "/ncrc/proj/epic/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core") + +local stack_intel_ver=os.getenv("stack_intel_ver") or "2023.1.0" +local python_ver=os.getenv("python_ver") or "3.10.13" + +load(pathJoin("stack-intel", stack_intel_ver)) +load(pathJoin("python", python_ver)) +load("py-jinja2") +load("py-pyyaml") +load("py-numpy") +local git_ver=os.getenv("git_ver") or "2.35.2" +load(pathJoin("git", git_ver)) + +whatis("Description: GFS run setup environment") diff --git a/parm/config/gfs/config.aero b/parm/config/gfs/config.aero index c152fafd12..2fae019574 100644 --- a/parm/config/gfs/config.aero +++ b/parm/config/gfs/config.aero @@ -20,6 +20,9 @@ case ${machine} in "WCOSS2") AERO_INPUTS_DIR="/lfs/h2/emc/global/noscrub/emc.global/data/gocart_emissions" ;; + "GAEA") + AERO_INPUTS_DIR="/gpfs/f5/epic/proj-shared/global/glopara/data/gocart_emissions" + ;; "JET") AERO_INPUTS_DIR="/lfs4/HFIP/hfv3gfs/glopara/data/gocart_emissions" ;; diff --git a/parm/config/gfs/config.resources b/parm/config/gfs/config.resources index d58ecf85b2..c583e0c04e 100644 --- a/parm/config/gfs/config.resources +++ b/parm/config/gfs/config.resources @@ -35,6 +35,7 @@ echo "BEGIN: config.resources" case ${machine} in "WCOSS2") npe_node_max=128;; "HERA") npe_node_max=40;; + "GAEA") npe_node_max=128;; "ORION") npe_node_max=40;; "HERCULES") npe_node_max=80;; "JET") diff --git a/sorc/link_workflow.sh b/sorc/link_workflow.sh index c5d7243e8f..68873d0f1a 100755 --- a/sorc/link_workflow.sh +++ b/sorc/link_workflow.sh @@ -75,6 +75,7 @@ case "${machine}" in "hercules") FIX_DIR="/work/noaa/global/glopara/fix" ;; "jet") FIX_DIR="/lfs4/HFIP/hfv3gfs/glopara/git/fv3gfs/fix" ;; "s4") FIX_DIR="/data/prod/glopara/fix" ;; + "gaea") FIX_DIR="/gpfs/f5/epic/proj-shared/global/glopara/data/fix" ;; *) echo "FATAL: Unknown target machine ${machine}, couldn't set FIX_DIR" exit 1 diff --git a/ush/detect_machine.sh b/ush/detect_machine.sh index 8a719c10d9..683ee0db7f 100755 --- a/ush/detect_machine.sh +++ b/ush/detect_machine.sh @@ -21,10 +21,8 @@ case $(hostname -f) in dlogin0[1-9].dogwood.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### dogwood01-9 dlogin10.dogwood.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### dogwood10 - gaea9) MACHINE_ID=gaea ;; ### gaea9 - gaea1[0-6]) MACHINE_ID=gaea ;; ### gaea10-16 - gaea9.ncrc.gov) MACHINE_ID=gaea ;; ### gaea9 - gaea1[0-6].ncrc.gov) MACHINE_ID=gaea ;; ### gaea10-16 + gaea5[1-8]) MACHINE_ID=gaea ;; ### gaea51-58 + gaea5[1-8].ncrc.gov) MACHINE_ID=gaea ;; ### gaea51-58 hfe0[1-9]) MACHINE_ID=hera ;; ### hera01-09 hfe1[0-2]) MACHINE_ID=hera ;; ### hera10-12 diff --git a/ush/module-setup.sh b/ush/module-setup.sh index b66e3622d0..b4ec3edafa 100755 --- a/ush/module-setup.sh +++ b/ush/module-setup.sh @@ -70,39 +70,10 @@ elif [[ ${MACHINE_ID} = stampede* ]] ; then elif [[ ${MACHINE_ID} = gaea* ]] ; then # We are on GAEA. if ( ! eval module help > /dev/null 2>&1 ) ; then - # We cannot simply load the module command. The GAEA - # /etc/profile modifies a number of module-related variables - # before loading the module command. Without those variables, - # the module command fails. Hence we actually have to source - # /etc/profile here. - source /etc/profile - __ms_source_etc_profile=yes - else - __ms_source_etc_profile=no - fi - module purge - # clean up after purge - unset _LMFILES_ - unset _LMFILES_000 - unset _LMFILES_001 - unset LOADEDMODULES - module load modules - if [[ -d /opt/cray/ari/modulefiles ]] ; then - module use -a /opt/cray/ari/modulefiles - fi - if [[ -d /opt/cray/pe/ari/modulefiles ]] ; then - module use -a /opt/cray/pe/ari/modulefiles - fi - if [[ -d /opt/cray/pe/craype/default/modulefiles ]] ; then - module use -a /opt/cray/pe/craype/default/modulefiles - fi - if [[ -s /etc/opt/cray/pe/admin-pe/site-config ]] ; then - source /etc/opt/cray/pe/admin-pe/site-config - fi - if [[ "${__ms_source_etc_profile}" == yes ]] ; then + source /usr/share/lmod/lmod/init/bash source /etc/profile - unset __ms_source_etc_profile fi + module reset elif [[ ${MACHINE_ID} = expanse* ]]; then # We are on SDSC Expanse diff --git a/versions/build.gaea.ver b/versions/build.gaea.ver new file mode 100644 index 0000000000..b92fe8c1db --- /dev/null +++ b/versions/build.gaea.ver @@ -0,0 +1,6 @@ +export stack_intel_ver=2023.1.0 +export stack_cray_mpich_ver=8.1.25 +export spack_env=gsi-addon-dev + +source "${HOMEgfs:-}/versions/run.spack.ver" +export spack_mod_path="/ncrc/proj/epic/spack-stack/spack-stack-${spack_stack_ver}/envs/${spack_env}/install/modulefiles/Core" diff --git a/versions/run.gaea.ver b/versions/run.gaea.ver new file mode 100644 index 0000000000..b92fe8c1db --- /dev/null +++ b/versions/run.gaea.ver @@ -0,0 +1,6 @@ +export stack_intel_ver=2023.1.0 +export stack_cray_mpich_ver=8.1.25 +export spack_env=gsi-addon-dev + +source "${HOMEgfs:-}/versions/run.spack.ver" +export spack_mod_path="/ncrc/proj/epic/spack-stack/spack-stack-${spack_stack_ver}/envs/${spack_env}/install/modulefiles/Core" diff --git a/workflow/hosts.py b/workflow/hosts.py index a17cd3f4a8..2334a3ac35 100644 --- a/workflow/hosts.py +++ b/workflow/hosts.py @@ -15,7 +15,7 @@ class Host: """ SUPPORTED_HOSTS = ['HERA', 'ORION', 'JET', 'HERCULES', - 'WCOSS2', 'S4', 'CONTAINER', 'AWSPW'] + 'WCOSS2', 'S4', 'CONTAINER', 'AWSPW', 'GAEA'] def __init__(self, host=None): @@ -49,6 +49,8 @@ def detect(cls): machine = 'WCOSS2' elif os.path.exists('/data/prod'): machine = 'S4' + elif os.path.exists('/gpfs/f5'): + machine = 'GAEA' elif container is not None: machine = 'CONTAINER' elif pw_csp is not None: diff --git a/workflow/hosts/gaea.yaml b/workflow/hosts/gaea.yaml new file mode 100644 index 0000000000..7ca8420997 --- /dev/null +++ b/workflow/hosts/gaea.yaml @@ -0,0 +1,25 @@ +BASE_GIT: '/gpfs/f5/epic/proj-shared/global/glopara/data/git' +DMPDIR: '/gpfs/f5/epic/proj-shared/global/glopara/data/dump' +BASE_CPLIC: '/gpfs/f5/epic/proj-shared/global/glopara/data/ICSDIR/prototype_ICs' +PACKAGEROOT: '/gpfs/f5/epic/proj-shared/global/glopara/data/nwpara' +COMROOT: '/gpfs/f5/epic/proj-shared/global/glopara/data/com' +COMINsyn: '${COMROOT}/gfs/prod/syndat' +HOMEDIR: '/gpfs/f5/epic/scratch/${USER}' +STMP: '/gpfs/f5/epic/scratch/${USER}' +PTMP: '/gpfs/f5/epic/scratch/${USER}' +NOSCRUB: $HOMEDIR +ACCOUNT: epic +SCHEDULER: slurm +QUEUE: normal +QUEUE_SERVICE: normal +PARTITION_BATCH: batch +PARTITION_SERVICE: batch +CHGRP_RSTPROD: 'NO' +CHGRP_CMD: 'chgrp rstprod' +HPSSARCH: 'NO' +HPSS_PROJECT: emc-global +LOCALARCH: 'NO' +ATARDIR: '${NOSCRUB}/archive_rotdir/${PSLOT}' +MAKE_NSSTBUFR: 'NO' +MAKE_ACFTBUFR: 'NO' +SUPPORTED_RESOLUTIONS: ['C1152', 'C768', 'C384', 'C192', 'C96', 'C48']