From ade1486b60473e265f9ed46c41d2b4185448d5f3 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Fri, 3 Feb 2023 11:28:13 -0800 Subject: [PATCH] Docs: Lonestar6 GPUs (TACC) Add a documentation on how to run on Lonestar6 GPUs (TACC). --- Docs/source/install/hpc.rst | 1 + Docs/source/install/hpc/lonestar6.rst | 89 ++++++++++ .../install_a100_dependencies.sh | 168 ++++++++++++++++++ .../lonestar6-tacc/lonestar6_a100.sbatch | 41 +++++ .../lonestar6_warpx_a100.profile.example | 56 ++++++ 5 files changed, 355 insertions(+) create mode 100644 Docs/source/install/hpc/lonestar6.rst create mode 100755 Tools/machines/lonestar6-tacc/install_a100_dependencies.sh create mode 100644 Tools/machines/lonestar6-tacc/lonestar6_a100.sbatch create mode 100644 Tools/machines/lonestar6-tacc/lonestar6_warpx_a100.profile.example diff --git a/Docs/source/install/hpc.rst b/Docs/source/install/hpc.rst index 5efdeae39a8..f8baaf511bd 100644 --- a/Docs/source/install/hpc.rst +++ b/Docs/source/install/hpc.rst @@ -42,6 +42,7 @@ This section documents quick-start guides for a selection of supercomputers that hpc/lassen hpc/lawrencium hpc/leonardo + hpc/lonestar6 hpc/lumi hpc/lxplus hpc/ookami diff --git a/Docs/source/install/hpc/lonestar6.rst b/Docs/source/install/hpc/lonestar6.rst new file mode 100644 index 00000000000..b80346959d8 --- /dev/null +++ b/Docs/source/install/hpc/lonestar6.rst @@ -0,0 +1,89 @@ +.. _building-lonestar6: + +Lonestar6 (TACC) +================ + +The `Lonestar6 cluster `_ is located at `TACC `__. + + +Introduction +------------ + +If you are new to this system, **please see the following resources**: + +* `TACC user guide `__ +* Batch system: `Slurm `__ +* `Jupyter service `__ +* `Filesystem directories `__: + + * ``$HOME``: per-user home directory, backed up (10 GB) + * ``$WORK``: per-user production directory, not backed up, not purged, Lustre (1 TB) + * ``$SCRATCH``: per-user production directory, not backed up, purged every 10 days, Lustre (no limits, 8PByte total) + + +Installation +------------ + +Use the following commands to download the WarpX source code and switch to the correct branch: + +.. code-block:: bash + + git clone https://github.com/ECP-WarpX/WarpX.git $WORK/src/warpx + +We use the following modules and environments on the system (``$HOME/lonestar6_warpx_a100.profile``). + +.. literalinclude:: ../../../../Tools/machines/lonestar6-tacc/lonestar6_warpx_a100.profile.example + :language: bash + :caption: You can copy this file from ``Tools/machines/lonestar6-tacc/lonestar6_warpx_a100.profile.example``. + +We recommend to store the above lines in a file, such as ``$HOME/lonestar6_warpx_a100.profile``, and load it into your shell after a login: + +.. code-block:: bash + + source $HOME/lonestar6_warpx_a100.profile + +Then, ``cd`` into the directory ``$HOME/src/warpx`` and use the following commands to compile: + +.. code-block:: bash + + cd $WORK/src/warpx + rm -rf build + + cmake -S . -B build -DWarpX_DIMS=3 -DWarpX_COMPUTE=CUDA -DWarpX_PSATD=ON + cmake --build build -j 16 + +The general :ref:`cmake compile-time options ` apply as usual. + +**That's it!** +A 3D WarpX executable is now in ``build/bin/`` and :ref:`can be run ` with a :ref:`3D example inputs file `. +Most people execute the binary directly or copy it out to a location in ``$WORK`` or ``SCRATCH``. + + +.. _running-cpp-lonestar6: + +Running +------- + +.. _running-cpp-lonestar6-A100-GPUs: + +A100 GPUs (40 GB) +^^^^^^^^^^^^^^^^^ + +`84 GPU nodes, each with 2 A100 GPUs (40 GB) `__. + +The batch script below can be used to run a WarpX simulation on multiple nodes (change ``-N`` accordingly) on the supercomputer lonestar6 at tacc. +Replace descriptions between chevrons ``<>`` by relevant values, for instance ```` could be ``plasma_mirror_inputs``. +Note that we run one MPI rank per GPU. + + +.. literalinclude:: ../../../../Tools/machines/lonestar6-tacc/lonestar6_a100.sbatch + :language: bash + :caption: You can copy this file from ``Tools/machines/lonestar6-tacc/lonestar6_a100.sbatch``. + +To run a simulation, copy the lines above to a file ``lonestar6.sbatch`` and run + +.. code-block:: bash + + sbatch lonestar6_a100.sbatch + +to submit the job. diff --git a/Tools/machines/lonestar6-tacc/install_a100_dependencies.sh b/Tools/machines/lonestar6-tacc/install_a100_dependencies.sh new file mode 100755 index 00000000000..2e3d2181094 --- /dev/null +++ b/Tools/machines/lonestar6-tacc/install_a100_dependencies.sh @@ -0,0 +1,168 @@ +#!/bin/bash +# +# Copyright 2023 The WarpX Community +# +# This file is part of WarpX. +# +# Author: Axel Huebl +# License: BSD-3-Clause-LBNL + +# Exit on first error encountered ############################################# +# +set -eu -o pipefail + + +# Check: ###################################################################### +# +# Was lonestar6_warpx_a100.profile sourced and configured correctly? +if [ -z ${proj-} ]; then echo "WARNING: The 'proj' variable is not yet set in your lonestar6_warpx_a100.profile file! Please edit its line 2 to continue!"; exit 1; fi + + +# Remove old dependencies ##################################################### +# +SW_DIR="${WORK}/sw/lonestar6/sw/lonestar6/a100" +rm -rf ${SW_DIR} +mkdir -p ${SW_DIR} + +# remove common user mistakes in python, located in .local instead of a venv +python3 -m pip uninstall -qq -y pywarpx +python3 -m pip uninstall -qq -y warpx +python3 -m pip uninstall -qqq -y mpi4py 2>/dev/null || true + + +# General extra dependencies ################################################## +# + +# tmpfs build directory: avoids issues often seen with $HOME and is faster +build_dir=$(mktemp -d) + +# c-blosc (I/O compression) +if [ -d $HOME/src/c-blosc ] +then + cd $HOME/src/c-blosc + git fetch --prune + git checkout v1.21.1 + cd - +else + git clone -b v1.21.1 https://github.com/Blosc/c-blosc.git $HOME/src/c-blosc +fi +rm -rf $HOME/src/c-blosc-a100-build +cmake -S $HOME/src/c-blosc -B ${build_dir}/c-blosc-a100-build -DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDEACTIVATE_AVX2=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/c-blosc-1.21.1 +cmake --build ${build_dir}/c-blosc-a100-build --target install --parallel 16 +rm -rf ${build_dir}/c-blosc-a100-build + +# ADIOS2 +if [ -d $HOME/src/adios2 ] +then + cd $HOME/src/adios2 + git fetch --prune + git checkout v2.8.3 + cd - +else + git clone -b v2.8.3 https://github.com/ornladios/ADIOS2.git $HOME/src/adios2 +fi +rm -rf $HOME/src/adios2-a100-build +cmake -S $HOME/src/adios2 -B ${build_dir}/adios2-a100-build -DADIOS2_USE_Blosc=ON -DADIOS2_USE_Fortran=OFF -DADIOS2_USE_Python=OFF -DADIOS2_USE_ZeroMQ=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/adios2-2.8.3 +cmake --build ${build_dir}/adios2-a100-build --target install -j 16 +rm -rf ${build_dir}/adios2-a100-build + +# BLAS++ (for PSATD+RZ) +if [ -d $HOME/src/blaspp ] +then + cd $HOME/src/blaspp + git fetch --prune + git checkout v2024.05.31 + cd - +else + git clone -b v2024.05.31 https://github.com/icl-utk-edu/blaspp.git $HOME/src/blaspp +fi +rm -rf $HOME/src/blaspp-a100-build +CXX=$(which CC) cmake -S $HOME/src/blaspp -B ${build_dir}/blaspp-a100-build -Duse_openmp=OFF -Dgpu_backend=cuda -DCMAKE_CXX_STANDARD=17 -DCMAKE_INSTALL_PREFIX=${SW_DIR}/blaspp-2024.05.31 +cmake --build ${build_dir}/blaspp-a100-build --target install --parallel 16 +rm -rf ${build_dir}/blaspp-a100-build + +# LAPACK++ (for PSATD+RZ) +if [ -d $HOME/src/lapackpp ] +then + cd $HOME/src/lapackpp + git fetch --prune + git checkout v2024.05.31 + cd - +else + git clone -b v2024.05.31 https://github.com/icl-utk-edu/lapackpp.git $HOME/src/lapackpp +fi +rm -rf $HOME/src/lapackpp-a100-build +CXX=$(which CC) CXXFLAGS="-DLAPACK_FORTRAN_ADD_" cmake -S $HOME/src/lapackpp -B ${build_dir}/lapackpp-a100-build -DCMAKE_CXX_STANDARD=17 -Dbuild_tests=OFF -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON -DCMAKE_INSTALL_PREFIX=${SW_DIR}/lapackpp-2024.05.31 +cmake --build ${build_dir}/lapackpp-a100-build --target install --parallel 16 +rm -rf ${build_dir}/lapackpp-a100-build + +# heFFTe +if [ -d $HOME/src/heffte ] +then + cd $HOME/src/heffte + git fetch --prune + git checkout v2.4.0 + cd - +else + git clone -b v2.4.0 https://github.com/icl-utk-edu/heffte.git ${HOME}/src/heffte +fi +rm -rf ${HOME}/src/heffte-a100-build +cmake \ + -S ${HOME}/src/heffte \ + -B ${build_dir}/heffte-a100-build \ + -DBUILD_SHARED_LIBS=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON \ + -DCMAKE_INSTALL_PREFIX=${SW_DIR}/heffte-2.4.0 \ + -DHeffte_DISABLE_GPU_AWARE_MPI=OFF \ + -DHeffte_ENABLE_AVX=OFF \ + -DHeffte_ENABLE_AVX512=OFF \ + -DHeffte_ENABLE_FFTW=OFF \ + -DHeffte_ENABLE_CUDA=ON \ + -DHeffte_ENABLE_ROCM=OFF \ + -DHeffte_ENABLE_ONEAPI=OFF \ + -DHeffte_ENABLE_MKL=OFF \ + -DHeffte_ENABLE_DOXYGEN=OFF \ + -DHeffte_SEQUENTIAL_TESTING=OFF \ + -DHeffte_ENABLE_TESTING=OFF \ + -DHeffte_ENABLE_TRACING=OFF \ + -DHeffte_ENABLE_PYTHON=OFF \ + -DHeffte_ENABLE_FORTRAN=OFF \ + -DHeffte_ENABLE_SWIG=OFF \ + -DHeffte_ENABLE_MAGMA=OFF +cmake --build ${build_dir}/heffte-a100-build --target install --parallel 16 +rm -rf ${build_dir}/heffte-a100-build + + +# Python ###################################################################### +# +python3 -m pip install --upgrade pip +python3 -m pip install --upgrade virtualenv +python3 -m pip cache purge +rm -rf ${SW_DIR}/venvs/warpx-a100 +python3 -m venv ${SW_DIR}/venvs/warpx-a100 +source ${SW_DIR}/venvs/warpx-a100/bin/activate +python3 -m pip install --upgrade pip +python3 -m pip install --upgrade build +python3 -m pip install --upgrade packaging +python3 -m pip install --upgrade wheel +python3 -m pip install --upgrade setuptools +python3 -m pip install --upgrade cython +python3 -m pip install --upgrade numpy +python3 -m pip install --upgrade pandas +python3 -m pip install --upgrade scipy +python3 -m pip install --upgrade mpi4py --no-cache-dir --no-build-isolation --no-binary mpi4py +python3 -m pip install --upgrade openpmd-api +python3 -m pip install --upgrade matplotlib +python3 -m pip install --upgrade yt +# install or update WarpX dependencies +python3 -m pip install --upgrade -r $HOME/src/warpx/requirements.txt +#python3 -m pip install --upgrade cupy-cuda12x # CUDA 12 compatible wheel +# optimas (based on libEnsemble & ax->botorch->gpytorch->pytorch) +#python3 -m pip install --upgrade torch # CUDA 12 compatible wheel +#python3 -m pip install --upgrade optimas[all] + + +# remove build temporary directory +rm -rf ${build_dir} diff --git a/Tools/machines/lonestar6-tacc/lonestar6_a100.sbatch b/Tools/machines/lonestar6-tacc/lonestar6_a100.sbatch new file mode 100644 index 00000000000..bef40942ed6 --- /dev/null +++ b/Tools/machines/lonestar6-tacc/lonestar6_a100.sbatch @@ -0,0 +1,41 @@ +#!/bin/bash -l + +# Copyright 2021-2022 Axel Huebl, Kevin Gott +# +# This file is part of WarpX. +# +# License: BSD-3-Clause-LBNL + +#SBATCH -t 00:10:00 +#SBATCH -N 2 +#SBATCH -J WarpX +# note: must end on _g +#SBATCH -A +#SBATCH -q regular +#SBATCH -C gpu +#SBATCH --exclusive +#SBATCH --gpu-bind=none +#SBATCH --gpus-per-node=4 +#SBATCH -o WarpX.o%j +#SBATCH -e WarpX.e%j + +# executable & inputs file or python interpreter & PICMI script here +EXE=./warpx +INPUTS=inputs_small + +# pin to closest NIC to GPU +export MPICH_OFI_NIC_POLICY=GPU + +# threads for OpenMP and threaded compressors per MPI rank +export SRUN_CPUS_PER_TASK=32 + +# depends on https://github.com/ECP-WarpX/WarpX/issues/2009 +#GPU_AWARE_MPI="amrex.the_arena_is_managed=0 amrex.use_gpu_aware_mpi=1" +GPU_AWARE_MPI="" + +# CUDA visible devices are ordered inverse to local task IDs +# Reference: nvidia-smi topo -m +srun --cpu-bind=cores bash -c " + export CUDA_VISIBLE_DEVICES=\$((3-SLURM_LOCALID)); + ${EXE} ${INPUTS} ${GPU_AWARE_MPI}" \ + > output.txt diff --git a/Tools/machines/lonestar6-tacc/lonestar6_warpx_a100.profile.example b/Tools/machines/lonestar6-tacc/lonestar6_warpx_a100.profile.example new file mode 100644 index 00000000000..1b3e6a751e8 --- /dev/null +++ b/Tools/machines/lonestar6-tacc/lonestar6_warpx_a100.profile.example @@ -0,0 +1,56 @@ +# please set your project account +#export proj="_g" # change me + +# required dependencies +module load gcc/11.2.0 +module load cmake/3.24.2 +module load cuda/11.4 + +# optional: for QED support with detailed tables +module load boost/1.84 + +# optional: for openPMD and PSATD+RZ support +module load phdf5/1.10.4 + +SW_DIR="${WORK}/sw/lonestar6/sw/lonestar6/a100" +export CMAKE_PREFIX_PATH=${SW_DIR}/c-blosc-1.21.1:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=${SW_DIR}/adios2-2.8.3:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=${SW_DIR}/blaspp-2024.05.31:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=${SW_DIR}/lapackpp-2024.05.31:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=${SW_DIR}/heffte-2.4.0:$CMAKE_PREFIX_PATH + +export LD_LIBRARY_PATH=${SW_DIR}/c-blosc-1.21.1/lib64:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=${SW_DIR}/adios2-2.8.3/lib64:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=${SW_DIR}/blaspp-2024.05.31/lib64:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=${SW_DIR}/lapackpp-2024.05.31/lib64:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=${SW_DIR}/heffte-2.4.0/lib64:$LD_LIBRARY_PATH + +export PATH=${SW_DIR}/adios2-2.8.3/bin:${PATH} + +# optional: CCache +#module load ccache # TODO: request from support + +# optional: for Python bindings or libEnsemble +module load python3/3.9.7 + +if [ -d "$WORK/sw/lonestar6/a100/venvs/warpx-a100" ] +then + source $WORK/sw/lonestar6/a100/venvs/warpx-a100/bin/activate +fi + +# an alias to request an interactive batch node for one hour +# for parallel execution, start on the batch node: srun +alias getNode="salloc -N 1 --ntasks-per-node=2 -t 1:00:00 -p gpu-100 --gpu-bind=single:1 -c 32 -G 2 -A $proj" +# an alias to run a command on a batch node for up to 30min +# usage: runNode +alias runNode="srun -N 1 --ntasks-per-node=2 -t 0:30:00 -p gpu-100 --gpu-bind=single:1 -c 32 -G 2 -A $proj" + +# optimize CUDA compilation for A100 +export AMREX_CUDA_ARCH=8.0 + +# compiler environment hints +export CC=$(which gcc) +export CXX=$(which g++) +export FC=$(which gfortran) +export CUDACXX=$(which nvcc) +export CUDAHOSTCXX=${CXX}