diff --git a/.github/workflows/test_eessi.yml b/.github/workflows/test-pilot.nessi.no.yml
similarity index 62%
rename from .github/workflows/test_eessi.yml
rename to .github/workflows/test-pilot.nessi.no.yml
index fee95f6b79..6342d5df19 100644
--- a/.github/workflows/test_eessi.yml
+++ b/.github/workflows/test-pilot.nessi.no.yml
@@ -1,80 +1,66 @@
# documentation: https://help.github.com/en/articles/workflow-syntax-for-github-actions
-name: Tests relying on having EESSI pilot repo mounted
+name: Check for missing software installations in pilot.nessi.no
on: [push, pull_request, workflow_dispatch]
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
- pilot:
- runs-on: ubuntu-20.04
+ check_missing:
+ runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
EESSI_VERSION:
- 2023.06
- EESSI_SOFTWARE_SUBDIR:
+ EESSI_SOFTWARE_SUBDIR_OVERRIDE:
# - aarch64/generic
- x86_64/amd/zen2
- x86_64/intel/broadwell
-# - x86_64/intel/cascadelake
- x86_64/intel/skylake_avx512
- x86_64/generic
- EASYSTACK_FILE:
- - eessi-2023.06-eb-4.7.2-2021a.yml
- - eessi-2023.06-eb-4.7.2-2021b.yml
- - eessi-2023.06-eb-4.7.2-2022a.yml
- - eessi-2023.06-eb-4.7.2-2022b.yml
- - eessi-2023.06-eb-4.7.2-system.yml
- - eessi-2023.06-eb-4.8.0-system.yml
- - eessi-2023.06-eb-4.8.1-2022a.yml
- - eessi-2023.06-eb-4.8.1-system.yml
- - eessi-2023.06-eb-4.8.2-2022a.yml
steps:
- name: Check out software-layer repository
uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0
- - name: Mount EESSI CernVM-FS pilot repository
+ - name: Mount NESSI CernVM-FS repository
uses: cvmfs-contrib/github-action-cvmfs@d4641d0d591c9a5c3be23835ced2fb648b44c04b # v3.1
with:
cvmfs_config_package: https://github.com/NorESSI/filesystem-layer/releases/download/latest/cvmfs-config-nessi_latest_all.deb
cvmfs_http_proxy: DIRECT
cvmfs_repositories: pilot.nessi.no
- - name: Test check_missing_installations.sh script with EESSI_SOFTWARE_SUBDIR_OVERRIDE
- if: '!cancelled()'
- run: |
- export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR}}
- source /cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}/init/bash
- module load EasyBuild
- eb --version
- export EESSI_PREFIX=/cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}
- export EESSI_OS_TYPE=linux
- env | grep ^EESSI | sort
- echo "just run check_missing_installations.sh (should use ${{matrix.EASYSTACK_FILE}})"
- ./check_missing_installations.sh ${{matrix.EASYSTACK_FILE}}
-
- - name: Test check_missing_installations.sh script without EESSI_SOFTWARE_SUBDIR_OVERRIDE
- if: '!cancelled()'
+ - name: Test check_missing_installations.sh script
run: |
+ export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}}
source /cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}/init/bash
+ # set $EESSI_CPU_FAMILY to the CPU architecture that corresponds to $EESSI_SOFTWARE_SUBDIR_OVERRIDE (part before the first slash),
+ # to prevent issues with checks in the Easybuild configuration that use this variable
+ export EESSI_CPU_FAMILY=${EESSI_SOFTWARE_SUBDIR_OVERRIDE%%/*}
module load EasyBuild
+ which eb
eb --version
export EESSI_PREFIX=/cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}
export EESSI_OS_TYPE=linux
- export EESSI_SOFTWARE_SUBDIR=${{matrix.EESSI_SOFTWARE_SUBDIR}}
env | grep ^EESSI | sort
- echo "just run check_missing_installations.sh (should use ${{matrix.EASYSTACK_FILE}})"
- ./check_missing_installations.sh ${{matrix.EASYSTACK_FILE}}
+ echo "just run check_missing_installations.sh (should use easystacks/pilot.nessi.no/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-*.yml)"
+ for easystack_file in $(ls easystacks/pilot.nessi.no/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-eb-*.yml); do
+ echo "check missing installations for ${easystack_file}..."
+ ./check_missing_installations.sh ${easystack_file}
+ ec=$?
+ if [[ ${ec} -ne 0 ]]; then echo "missing installations found for ${easystack_file}!" >&2; exit ${ec}; fi
+ done
- name: Test check_missing_installations.sh with missing package (GCC/8.3.0)
- if: '!cancelled()'
run: |
+ export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}}
source /cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}/init/bash
+ # set $EESSI_CPU_FAMILY to the CPU architecture that corresponds to $EESSI_SOFTWARE_SUBDIR_OVERRIDE (part before the first slash),
+ # to prevent issues with checks in the Easybuild configuration that use this variable
+ export EESSI_CPU_FAMILY=${EESSI_SOFTWARE_SUBDIR_OVERRIDE%%/*}
module load EasyBuild
which eb
eb --version
export EESSI_PREFIX=/cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}
export EESSI_OS_TYPE=linux
- export EESSI_SOFTWARE_SUBDIR=${{matrix.EESSI_SOFTWARE_SUBDIR}}
env | grep ^EESSI | sort
# create dummy easystack file with a single entry (something that is not installed in EESSI)
easystack_file="test.yml"
diff --git a/.github/workflows/test_eessi_container_script.yml b/.github/workflows/test_eessi_container_script.yml
index 929fb22cec..3bb67b445f 100644
--- a/.github/workflows/test_eessi_container_script.yml
+++ b/.github/workflows/test_eessi_container_script.yml
@@ -45,7 +45,7 @@ jobs:
elif [[ ${{matrix.SCRIPT_TEST}} == 'listrepos_default' ]]; then
outfile=out_listrepos.txt
./eessi_container.sh --verbose --list-repos | tee ${outfile}
- grep "EESSI-pilot" ${outfile}
+ grep "EESSI" ${outfile}
# test use of --list-repos with custom repos.cfg
elif [[ ${{matrix.SCRIPT_TEST}} == 'listrepos_custom' ]]; then
@@ -57,7 +57,7 @@ jobs:
echo "[EESSI/20HT.TP]" >> cfg/repos.cfg
echo "repo_version = 20HT.TP" >> cfg/repos.cfg
./eessi_container.sh --verbose --list-repos | tee ${outfile}
- grep "EESSI-pilot" ${outfile}
+ grep "EESSI" ${outfile}
export EESSI_REPOS_CFG_DIR_OVERRIDE=${PWD}/cfg
./eessi_container.sh --verbose --list-repos | tee ${outfile2}
@@ -90,15 +90,15 @@ jobs:
elif [[ ${{matrix.SCRIPT_TEST}} == 'readwrite' ]]; then
outfile=out_readwrite.txt
fn="test_${RANDOM}.txt"
- echo "touch /cvmfs/pilot.eessi-hpc.org/${fn}" > test_script.sh
+ echo "touch /cvmfs/pilot.nessi.no/${fn}" > test_script.sh
chmod u+x test_script.sh
export SINGULARITY_BIND="$PWD:/test"
./eessi_container.sh --verbose --access rw --mode run /test/test_script.sh > ${outfile}
tmpdir=$(grep "\-\-resume" ${outfile} | sed "s/.*--resume \([^']*\).*/\1/g")
# note: must use '--access rw' again here, since touched file is in overlay upper dir
- ./eessi_container.sh --verbose --resume ${tmpdir} --access rw --mode shell <<< "ls -l /cvmfs/pilot.eessi-hpc.org/${fn}" > ${outfile}
- grep "/cvmfs/pilot.eessi-hpc.org/${fn}$" $outfile
+ ./eessi_container.sh --verbose --resume ${tmpdir} --access rw --mode shell <<< "ls -l /cvmfs/pilot.nessi.no/${fn}" > ${outfile}
+ grep "/cvmfs/pilot.nessi.no/${fn}$" $outfile
# test use of --resume
elif [[ ${{matrix.SCRIPT_TEST}} == 'resume' ]]; then
@@ -120,12 +120,12 @@ jobs:
elif [[ ${{matrix.SCRIPT_TEST}} == 'save' ]]; then
outfile=out_save.txt
fn="test_${RANDOM}.txt"
- test_cmd="touch /cvmfs/pilot.eessi-hpc.org/${fn}"
+ test_cmd="touch /cvmfs/pilot.nessi.no/${fn}"
./eessi_container.sh --verbose --mode shell --access rw --save test-save.tar <<< "${test_cmd}" 2>&1 | tee ${outfile}
rm -f ${outfile}
- ./eessi_container.sh --verbose --mode shell --access rw --resume test-save.tar <<< "ls -l /cvmfs/pilot.eessi-hpc.org/${fn}" > ${outfile}
- grep "/cvmfs/pilot.eessi-hpc.org/${fn}$" $outfile
+ ./eessi_container.sh --verbose --mode shell --access rw --resume test-save.tar <<< "ls -l /cvmfs/pilot.nessi.no/${fn}" > ${outfile}
+ grep "/cvmfs/pilot.nessi.no/${fn}$" $outfile
tar tfv test-save.tar | grep "overlay-upper/${fn}"
diff --git a/.github/workflows/test_licenses.yml b/.github/workflows/test_licenses.yml
new file mode 100644
index 0000000000..00a2c90f6b
--- /dev/null
+++ b/.github/workflows/test_licenses.yml
@@ -0,0 +1,20 @@
+# documentation: https://help.github.com/en/articles/workflow-syntax-for-github-actions
+name: Test software licenses
+on: [push, pull_request]
+permissions:
+ contents: read # to fetch code (actions/checkout)
+jobs:
+ build:
+ runs-on: ubuntu-20.04
+ steps:
+ - name: Check out software-layer repository
+ uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0
+
+ - name: set up Python
+ uses: actions/setup-python@13ae5bb136fac2878aff31522b9efb785519f984 # v4.3.0
+ with:
+ python-version: '3.9'
+
+ - name: Check software licenses
+ run: |
+ python licenses/spdx.py licenses/licenses.json
diff --git a/.github/workflows/tests_archdetect.yml b/.github/workflows/tests_archdetect.yml
index 618f6eb142..74dbf032f0 100644
--- a/.github/workflows/tests_archdetect.yml
+++ b/.github/workflows/tests_archdetect.yml
@@ -13,24 +13,59 @@ jobs:
- x86_64/intel/skylake_avx512/archspec-linux-6132
- x86_64/amd/zen2/Azure-CentOS7-7V12
- x86_64/amd/zen3/Azure-CentOS7-7V73X
- - ppc64le/power9le/unknown-power9le
- - aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra
- - aarch64/arm/neoverse-n1/AWS-awslinux-graviton2
- - aarch64/arm/neoverse-v1/AWS-awslinux-graviton3
+ # commented out since these targets are currently not supported in pilot.nessi.no repo
+ # (and some tests assume that the corresponding subdirectory in software layer is there)
+ # - ppc64le/power9le/unknown-power9le
+ # - aarch64/neoverse-n1/Azure-Ubuntu20-Altra
+ # - aarch64/neoverse-n1/AWS-awslinux-graviton2
+ # - aarch64/neoverse-v1/AWS-awslinux-graviton3
fail-fast: false
steps:
- name: checkout
uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0
+ - name: Mount NESSI CernVM-FS repository
+ uses: cvmfs-contrib/github-action-cvmfs@d4641d0d591c9a5c3be23835ced2fb648b44c04b # v3.1
+ with:
+ cvmfs_config_package: https://github.com/NorESSI/filesystem-layer/releases/download/latest/cvmfs-config-nessi_latest_all.deb
+ cvmfs_http_proxy: DIRECT
+ cvmfs_repositories: pilot.nessi.no
+
- name: test eessi_archdetect.sh
run: |
export EESSI_MACHINE_TYPE=${{matrix.proc_cpuinfo}}
export EESSI_MACHINE_TYPE=${EESSI_MACHINE_TYPE%%/*}
export EESSI_PROC_CPUINFO=./tests/archdetect/${{matrix.proc_cpuinfo}}.cpuinfo
+ # check that printing of best match works correctly
CPU_ARCH=$(./init/eessi_archdetect.sh cpupath)
if [[ $CPU_ARCH == "$( cat ./tests/archdetect/${{matrix.proc_cpuinfo}}.output )" ]]; then
- echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCH" >&2
+ echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCH"
else
echo "Test for ${{matrix.proc_cpuinfo}} FAILED: $CPU_ARCH" >&2
exit 1
fi
+ # check that $EESSI_SOFTWARE_SUBDIR_OVERRIDE is honored
+ export EESSI_SOFTWARE_SUBDIR_OVERRIDE='dummy/cpu'
+ CPU_ARCH=$(./init/eessi_archdetect.sh cpupath)
+ if [[ $CPU_ARCH == "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then
+ echo "Test for picking up on \$EESSI_SOFTWARE_SUBDIR_OVERRIDE PASSED"
+ else
+ echo "Test for picking up on \$EESSI_SOFTWARE_SUBDIR_OVERRIDE FAILED" >&2
+ exit 1
+ fi
+ unset EESSI_SOFTWARE_SUBDIR_OVERRIDE
+ # check that printing of all matches works correctly (-a option for cpupath action)
+ CPU_ARCHES=$(./init/eessi_archdetect.sh -a cpupath)
+ if [[ $CPU_ARCHES == "$( cat ./tests/archdetect/${{matrix.proc_cpuinfo}}.all.output )" ]]; then
+ echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCHES"
+ else
+ echo "Test for ${{matrix.proc_cpuinfo}} FAILED: $CPU_ARCHES" >&2
+ exit 1
+ fi
+ # Check all those architectures actually exist (if this EESSI version has been populated already)
+ if [ -d ${EESSI_PREFIX}/software/linux ]; then
+ for dir in $(echo "$CPU_ARCHES" | tr ':' '\n'); do
+ # Search all EESSI versions as we may drop support at some point
+ ls -d ${EESSI_PREFIX}/software/linux/${dir}
+ done
+ fi
diff --git a/.github/workflows/tests_scripts.yml b/.github/workflows/tests_scripts.yml
index 607e5c0744..a369f4f187 100644
--- a/.github/workflows/tests_scripts.yml
+++ b/.github/workflows/tests_scripts.yml
@@ -5,7 +5,7 @@ on:
paths:
- build_container.sh
- create_directory_tarballs.sh
- - EESSI-pilot-install-software.sh
+ - EESSI-install-software.sh
- install_software_layer.sh
- load_easybuild_module.sh
- run_in_compat_layer_env.sh
@@ -16,7 +16,7 @@ on:
paths:
- build_container.sh
- create_directory_tarballs.sh
- - EESSI-pilot-install-software.sh
+ - EESSI-install-software.sh
- install_software_layer.sh
- load_easybuild_module.sh
- run_in_compat_layer_env.sh
@@ -40,7 +40,7 @@ jobs:
# bind current directory into container as /software-layer
export SINGULARITY_BIND="${PWD}:/software-layer"
- # can't test with EasyBuild versions older than v4.5.2 when using EESSI pilot 2023.06,
+ # can't test with EasyBuild versions older than v4.5.2 when using EESSI 2023.06,
# since Python in compat layer is Python 3.11.x;
# testing with a single EasyBuild version takes a while in GitHub Actions, so stick to a single sensible version
for EB_VERSION in '4.6.0'; do
diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh
index 585312e8d6..eb616b1311 100755
--- a/EESSI-install-software.sh
+++ b/EESSI-install-software.sh
@@ -187,31 +187,46 @@ fi
# assume there's only one diff file that corresponds to the PR patch file
pr_diff=$(ls [0-9]*.diff | head -1)
+# install any additional required scripts
+# order is important: these are needed to install a full CUDA SDK in host_injections
+# for now, this just reinstalls all scripts. Note the most elegant, but works
+${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX}
+
+# Install full CUDA SDK in host_injections
+# Hardcode this for now, see if it works
+# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install
+${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula
+
+# Install drivers in host_injections
+# TODO: this is commented out for now, because the script assumes that nvidia-smi is available and works;
+# if not, an error is produced, and the bot flags the whole build as failed (even when not installing GPU software)
+# ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh
+
# use PR patch file to determine in which easystack files stuff was added
changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing')
if [ -z ${changed_easystacks} ]; then
echo "No missing installations, party time!" # Ensure the bot report success, as there was nothing to be build here
else
for easystack_file in ${changed_easystacks}; do
-
+
echo -e "Processing easystack file ${easystack_file}...\n\n"
-
+
# determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file
eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g')
-
+
# load EasyBuild module (will be installed if it's not available yet)
source ${TOPDIR}/load_easybuild_module.sh ${eb_version}
-
+
${EB} --show-config
-
+
echo_green "All set, let's start installing some software with EasyBuild v${eb_version} in ${EASYBUILD_INSTALLPATH}..."
-
+
if [ -f ${easystack_file} ]; then
echo_green "Feeding easystack file ${easystack_file} to EasyBuild..."
-
+
${EB} --easystack ${TOPDIR}/${easystack_file} --robot
ec=$?
-
+
# copy EasyBuild log file if EasyBuild exited with an error
if [ ${ec} -ne 0 ]; then
eb_last_log=$(unset EB_VERBOSE; eb --last-log)
@@ -221,18 +236,21 @@ else
# copy to build logs dir (with context added)
copy_build_log "${eb_last_log}" "${build_logs_dir}"
fi
-
+
$TOPDIR/check_missing_installations.sh ${TOPDIR}/${easystack_file}
else
fatal_error "Easystack file ${easystack_file} not found!"
fi
-
+
done
fi
+### add packages here
+
echo ">> Creating/updating Lmod cache..."
export LMOD_RC="${EASYBUILD_INSTALLPATH}/.lmod/lmodrc.lua"
-if [ ! -f $LMOD_RC ]; then
+lmodrc_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodrc.py$' > /dev/null; echo $?)
+if [ ! -f $LMOD_RC ] || [ ${lmodrc_changed} == '0' ]; then
python3 $TOPDIR/create_lmodrc.py ${EASYBUILD_INSTALLPATH}
check_exit_code $? "$LMOD_RC created" "Failed to create $LMOD_RC"
fi
diff --git a/bot/build.sh b/bot/build.sh
index 458abaeebd..7eb15f319d 100755
--- a/bot/build.sh
+++ b/bot/build.sh
@@ -176,6 +176,11 @@ mkdir -p ${TARBALL_TMP_BUILD_STEP_DIR}
declare -a BUILD_STEP_ARGS=()
BUILD_STEP_ARGS+=("--save" "${TARBALL_TMP_BUILD_STEP_DIR}")
BUILD_STEP_ARGS+=("--storage" "${STORAGE}")
+# add options required to handle NVIDIA support
+BUILD_STEP_ARGS+=("--nvidia" "all")
+if [[ ! -z ${SHARED_FS_PATH} ]]; then
+ BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections")
+fi
# prepare arguments to install_software_layer.sh (specific to build step)
declare -a INSTALL_SCRIPT_ARGS=()
diff --git a/bot/check-result.sh b/bot/check-result.sh
new file mode 120000
index 0000000000..02f753db50
--- /dev/null
+++ b/bot/check-result.sh
@@ -0,0 +1 @@
+check-build.sh
\ No newline at end of file
diff --git a/bot/check-test.sh b/bot/check-test.sh
new file mode 100755
index 0000000000..76e0df7f40
--- /dev/null
+++ b/bot/check-test.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Dummy script that only creates test result file for the bot, without actually checking anything
+#
+# This script is part of the EESSI software layer, see
+# https://github.com/EESSI/software-layer.git
+#
+# author: Kenneth Hoste (HPC-UGent)
+#
+# license: GPLv2
+#
+job_dir=${PWD}
+job_out="slurm-${SLURM_JOB_ID}.out"
+job_test_result_file="_bot_job${SLURM_JOB_ID}.test"
+
+echo "[TEST]" > ${job_test_result_file}
+echo "comment_description = (no tests yet)" >> ${job_test_result_file}
+echo "status = SUCCESS" >> ${job_test_result_file}
+
+exit 0
diff --git a/bot/inspect.sh b/bot/inspect.sh
new file mode 100755
index 0000000000..9d1fa87e1f
--- /dev/null
+++ b/bot/inspect.sh
@@ -0,0 +1,446 @@
+#!/usr/bin/env bash
+#
+# Script to inspect result of a build job for the EESSI software layer.
+# Intended use is that it is called with a path to a job directory.
+#
+# This script is part of the EESSI software layer, see
+# https://github.com/EESSI/software-layer.git
+#
+# author: Thomas Roeblitz (@trz42)
+#
+# license: GPLv2
+#
+
+# ASSUMPTIONs:
+# - Script is executed on the same architecture the job was running on.
+# - Initially, we also assume that is run on the same resource with the
+# same (compute) node setup (local disk space, HTTP proxies, etc.)
+# - The job directory being supplied has been prepared by the bot with a
+# checkout of a pull request (OR by some other means)
+# - The job directory contains a directory 'cfg' where the main config
+# file 'job.cfg' has been deposited.
+# - The 'cfg' directory may contain any additional files referenced in
+# 'job.cfg' (repos.cfg, etc.).
+# - The job produced some tarballs for its state (tmp disk for overlayfs,
+# CVMFS cache, etc.) under 'previous_tmp/{build,tarball}_step'.
+
+# stop as soon as something fails
+set -e
+
+display_help() {
+ echo "usage: $0 [OPTIONS]"
+ echo " -h | --help - display this usage information"
+ echo " -r | --resume TGZ - inspect job saved in tarball path TGZ; note, we assume the path"
+ echo " to be something like JOB_DIR/previous_tmp/{build,tarball}_step/TARBALL.tgz"
+ echo " and thus determine JOB_DIR from the given path"
+ echo " [default: none]"
+ echo " -c | --command COMMAND - command to execute inside the container, in the prefix environment"
+ echo " -x | --http-proxy URL - provides URL for the environment variable http_proxy"
+ echo " -y | --https-proxy URL - provides URL for the environment variable https_proxy"
+}
+
+resume_tgz=
+http_proxy=
+https_proxy=
+
+POSITIONAL_ARGS=()
+
+while [[ $# -gt 0 ]]; do
+ case ${1} in
+ -h|--help)
+ display_help
+ exit 0
+ ;;
+ -r|--resume)
+ export resume_tgz="${2}"
+ shift 2
+ ;;
+ -x|--http-proxy)
+ export http_proxy="${2}"
+ shift 2
+ ;;
+ -y|--https-proxy)
+ export https_proxy="${2}"
+ shift 2
+ ;;
+ -c|--command)
+ export run_in_prefix="${2}"
+ shift 2
+ ;;
+ -*|--*)
+ echo "Error: Unknown option: ${1}" >&2
+ exit 1
+ ;;
+ *) # No more options
+ POSITIONAL_ARGS+=("${1}") # save positional arg
+ shift
+ ;;
+ esac
+done
+
+set -- "${POSITIONAL_ARGS[@]}"
+
+# source utils.sh and cfg_files.sh
+source scripts/utils.sh
+source scripts/cfg_files.sh
+
+if [[ -z ${resume_tgz} ]]; then
+ echo_red "path to tarball for resuming build job is missing"
+ display_help
+ exit 1
+fi
+
+job_dir=$(dirname $(dirname $(dirname ${resume_tgz})))
+
+if [[ -z ${job_dir} ]]; then
+ # job directory could be determined
+ echo_red "job directory could not be determined from '${resume_tgz}'"
+ display_help
+ exit 2
+fi
+
+# defaults
+export JOB_CFG_FILE="${job_dir}/cfg/job.cfg"
+HOST_ARCH=$(uname -m)
+
+# check if ${JOB_CFG_FILE} exists
+if [[ ! -r "${JOB_CFG_FILE}" ]]; then
+ fatal_error "job config file (JOB_CFG_FILE=${JOB_CFG_FILE}) does not exist or not readable"
+fi
+echo "bot/inspect.sh: showing ${JOB_CFG_FILE} from software-layer side"
+cat ${JOB_CFG_FILE}
+
+echo "bot/inspect.sh: obtaining configuration settings from '${JOB_CFG_FILE}'"
+cfg_load ${JOB_CFG_FILE}
+
+# if http_proxy is defined in ${JOB_CFG_FILE} use it, if not use env var $http_proxy
+HTTP_PROXY=$(cfg_get_value "site_config" "http_proxy")
+HTTP_PROXY=${HTTP_PROXY:-${http_proxy}}
+echo "bot/inspect.sh: HTTP_PROXY='${HTTP_PROXY}'"
+
+# if https_proxy is defined in ${JOB_CFG_FILE} use it, if not use env var $https_proxy
+HTTPS_PROXY=$(cfg_get_value "site_config" "https_proxy")
+HTTPS_PROXY=${HTTPS_PROXY:-${https_proxy}}
+echo "bot/inspect.sh: HTTPS_PROXY='${HTTPS_PROXY}'"
+
+LOCAL_TMP=$(cfg_get_value "site_config" "local_tmp")
+echo "bot/inspect.sh: LOCAL_TMP='${LOCAL_TMP}'"
+# TODO should local_tmp be mandatory? --> then we check here and exit if it is not provided
+
+# check if path to copy build logs to is specified, so we can copy build logs for failing builds there
+BUILD_LOGS_DIR=$(cfg_get_value "site_config" "build_logs_dir")
+echo "bot/inspect.sh: BUILD_LOGS_DIR='${BUILD_LOGS_DIR}'"
+# if $BUILD_LOGS_DIR is set, add it to $SINGULARITY_BIND so the path is available in the build container
+if [[ ! -z ${BUILD_LOGS_DIR} ]]; then
+ mkdir -p ${BUILD_LOGS_DIR}
+ if [[ -z ${SINGULARITY_BIND} ]]; then
+ export SINGULARITY_BIND="${BUILD_LOGS_DIR}"
+ else
+ export SINGULARITY_BIND="${SINGULARITY_BIND},${BUILD_LOGS_DIR}"
+ fi
+fi
+
+SINGULARITY_CACHEDIR=$(cfg_get_value "site_config" "container_cachedir")
+echo "bot/inspect.sh: SINGULARITY_CACHEDIR='${SINGULARITY_CACHEDIR}'"
+if [[ ! -z ${SINGULARITY_CACHEDIR} ]]; then
+ # make sure that separate directories are used for different CPU families
+ SINGULARITY_CACHEDIR=${SINGULARITY_CACHEDIR}/${HOST_ARCH}
+ export SINGULARITY_CACHEDIR
+fi
+
+echo -n "setting \$STORAGE by replacing any var in '${LOCAL_TMP}' -> "
+# replace any env variable in ${LOCAL_TMP} with its
+# current value (e.g., a value that is local to the job)
+STORAGE=$(envsubst <<< ${LOCAL_TMP})
+echo "'${STORAGE}'"
+
+# make sure ${STORAGE} exists
+mkdir -p ${STORAGE}
+
+# make sure the base tmp storage is unique
+JOB_STORAGE=$(mktemp --directory --tmpdir=${STORAGE} bot_job_tmp_XXX)
+echo "bot/inspect.sh: created unique base tmp storage directory at ${JOB_STORAGE}"
+
+# obtain list of modules to be loaded
+LOAD_MODULES=$(cfg_get_value "site_config" "load_modules")
+echo "bot/inspect.sh: LOAD_MODULES='${LOAD_MODULES}'"
+
+# singularity/apptainer settings: CONTAINER, HOME, TMPDIR, BIND
+CONTAINER=$(cfg_get_value "repository" "container")
+echo "bot/inspect.sh: CONTAINER='${CONTAINER}'"
+# instead of using ${PWD} as HOME in the container, we use the job directory
+# to have access to output files of the job
+export SINGULARITY_HOME="${job_dir}:/eessi_bot_job"
+echo "bot/inspect.sh: SINGULARITY_HOME='${SINGULARITY_HOME}'"
+export SINGULARITY_TMPDIR="${PWD}/singularity_tmpdir"
+echo "bot/inspect.sh: SINGULARITY_TMPDIR='${SINGULARITY_TMPDIR}'"
+mkdir -p ${SINGULARITY_TMPDIR}
+
+# load modules if LOAD_MODULES is not empty
+if [[ ! -z ${LOAD_MODULES} ]]; then
+ for mod in $(echo ${LOAD_MODULES} | tr ',' '\n')
+ do
+ echo "bot/inspect.sh: loading module '${mod}'"
+ module load ${mod}
+ done
+else
+ echo "bot/inspect.sh: no modules to be loaded"
+fi
+
+# determine repository to be used from entry .repository in ${JOB_CFG_FILE}
+REPOSITORY=$(cfg_get_value "repository" "repo_id")
+echo "bot/inspect.sh: REPOSITORY='${REPOSITORY}'"
+# TODO better to read this from tarball???
+EESSI_REPOS_CFG_DIR_OVERRIDE=$(cfg_get_value "repository" "repos_cfg_dir")
+export EESSI_REPOS_CFG_DIR_OVERRIDE=${EESSI_REPOS_CFG_DIR_OVERRIDE:-${PWD}/cfg}
+echo "bot/inspect.sh: EESSI_REPOS_CFG_DIR_OVERRIDE='${EESSI_REPOS_CFG_DIR_OVERRIDE}'"
+
+# determine EESSI version to be used from .repository.repo_version in ${JOB_CFG_FILE}
+# here, just set & export EESSI_VERSION_OVERRIDE
+# next script (eessi_container.sh) makes use of it via sourcing init scripts
+# (e.g., init/eessi_defaults or init/minimal_eessi_env)
+export EESSI_VERSION_OVERRIDE=$(cfg_get_value "repository" "repo_version")
+echo "bot/inspect.sh: EESSI_VERSION_OVERRIDE='${EESSI_VERSION_OVERRIDE}'"
+
+# determine CVMFS repo to be used from .repository.repo_name in ${JOB_CFG_FILE}
+# here, just set EESSI_CVMFS_REPO_OVERRIDE, a bit further down
+# "source init/eessi_defaults" via sourcing init/minimal_eessi_env
+export EESSI_CVMFS_REPO_OVERRIDE="/cvmfs/$(cfg_get_value 'repository' 'repo_name')"
+echo "bot/inspect.sh: EESSI_CVMFS_REPO_OVERRIDE='${EESSI_CVMFS_REPO_OVERRIDE}'"
+
+# determine architecture to be used from entry .architecture in ${JOB_CFG_FILE}
+# fallbacks:
+# - ${CPU_TARGET} handed over from bot
+# - left empty to let downstream script(s) determine subdir to be used
+EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(cfg_get_value "architecture" "software_subdir")
+EESSI_SOFTWARE_SUBDIR_OVERRIDE=${EESSI_SOFTWARE_SUBDIR_OVERRIDE:-${CPU_TARGET}}
+export EESSI_SOFTWARE_SUBDIR_OVERRIDE
+echo "bot/inspect.sh: EESSI_SOFTWARE_SUBDIR_OVERRIDE='${EESSI_SOFTWARE_SUBDIR_OVERRIDE}'"
+
+# get EESSI_OS_TYPE from .architecture.os_type in ${JOB_CFG_FILE} (default: linux)
+EESSI_OS_TYPE=$(cfg_get_value "architecture" "os_type")
+export EESSI_OS_TYPE=${EESSI_OS_TYPE:-linux}
+echo "bot/inspect.sh: EESSI_OS_TYPE='${EESSI_OS_TYPE}'"
+
+# prepare arguments to eessi_container.sh common to build and tarball steps
+declare -a CMDLINE_ARGS=()
+CMDLINE_ARGS+=("--verbose")
+CMDLINE_ARGS+=("--access" "rw")
+CMDLINE_ARGS+=("--mode" "run")
+[[ ! -z ${CONTAINER} ]] && CMDLINE_ARGS+=("--container" "${CONTAINER}")
+[[ ! -z ${HTTP_PROXY} ]] && CMDLINE_ARGS+=("--http-proxy" "${HTTP_PROXY}")
+[[ ! -z ${HTTPS_PROXY} ]] && CMDLINE_ARGS+=("--https-proxy" "${HTTPS_PROXY}")
+[[ ! -z ${REPOSITORY} ]] && CMDLINE_ARGS+=("--repository" "${REPOSITORY}")
+
+[[ ! -z ${resume_tgz} ]] && CMDLINE_ARGS+=("--resume" "${resume_tgz}")
+
+# create a directory for creating temporary data and scripts for the inspection
+INSPECT_DIR=$(mktemp --directory --tmpdir=${PWD} inspect.XXX)
+if [[ -z ${SINGULARITY_BIND} ]]; then
+ export SINGULARITY_BIND="${INSPECT_DIR}:/inspect_eessi_build_job"
+else
+ export SINGULARITY_BIND="${SINGULARITY_BIND},${INSPECT_DIR}:/inspect_eessi_build_job"
+fi
+
+# add arguments for temporary storage and storing a tarball of tmp
+CMDLINE_ARGS+=("--save" "${INSPECT_DIR}")
+CMDLINE_ARGS+=("--storage" "${JOB_STORAGE}")
+
+# # prepare arguments to install_software_layer.sh (specific to build step)
+# declare -a INSTALL_SCRIPT_ARGS=()
+# if [[ ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} =~ .*/generic$ ]]; then
+# INSTALL_SCRIPT_ARGS+=("--generic")
+# fi
+# [[ ! -z ${BUILD_LOGS_DIR} ]] && INSTALL_SCRIPT_ARGS+=("--build-logs-dir" "${BUILD_LOGS_DIR}")
+
+# make sure some environment settings are available inside the shell started via
+# startprefix
+base_dir=$(dirname $(realpath $0))
+# base_dir of inspect.sh script is '.../bot', 'init' dir is at the same level
+# TODO better use script from tarball???
+source ${base_dir}/../init/eessi_defaults
+
+if [ -z $EESSI_VERSION ]; then
+ echo "ERROR: \$EESSI_VERSION must be set!" >&2
+ exit 1
+fi
+EESSI_COMPAT_LAYER_DIR="${EESSI_CVMFS_REPO}/versions/${EESSI_VERSION}/compat/linux/$(uname -m)"
+
+# NOTE The below requires access to the CVMFS repository. We could make a first
+# test run with a container. For now we skip the test.
+# if [ ! -d ${EESSI_COMPAT_LAYER_DIR} ]; then
+# echo "ERROR: ${EESSI_COMPAT_LAYER_DIR} does not exist!" >&2
+# exit 1
+# fi
+
+# When we want to run a script with arguments, the next line is ensures to retain
+# these arguments.
+# INPUT=$(echo "$@")
+mkdir -p ${INSPECT_DIR}/scripts
+RESUME_SCRIPT=${INSPECT_DIR}/scripts/resume_env.sh
+echo "bot/inspect.sh: creating script '${RESUME_SCRIPT}' to resume environment settings"
+
+cat << EOF > ${RESUME_SCRIPT}
+#!${EESSI_COMPAT_LAYER_DIR}/bin/bash
+echo "Sourcing '\$BASH_SOURCE' to init bot environment of build job"
+EOF
+if [ ! -z ${SLURM_JOB_ID} ]; then
+ # TODO do we need the value at all? if so which one: current or of the job to
+ # inspect?
+ echo "export CURRENT_SLURM_JOB_ID=${SLURM_JOB_ID}" >> ${RESUME_SCRIPT}
+fi
+if [ ! -z ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} ]; then
+ echo "export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" >> ${RESUME_SCRIPT}
+fi
+if [ ! -z ${EESSI_CVMFS_REPO_OVERRIDE} ]; then
+ echo "export EESSI_CVMFS_REPO_OVERRIDE=${EESSI_CVMFS_REPO_OVERRIDE}" >> ${RESUME_SCRIPT}
+fi
+if [ ! -z ${EESSI_VERSION_OVERRIDE} ]; then
+ echo "export EESSI_VERSION_OVERRIDE=${EESSI_VERSION_OVERRIDE}" >> ${RESUME_SCRIPT}
+fi
+if [ ! -z ${http_proxy} ]; then
+ echo "export http_proxy=${http_proxy}" >> ${RESUME_SCRIPT}
+fi
+if [ ! -z ${https_proxy} ]; then
+ echo "export https_proxy=${https_proxy}" >> ${RESUME_SCRIPT}
+fi
+cat << 'EOF' >> ${RESUME_SCRIPT}
+TOPDIR=$(dirname $(realpath $BASH_SOURCE))
+
+source ${TOPDIR}/scripts/utils.sh
+
+# honor $TMPDIR if it is already defined, use /tmp otherwise
+if [ -z $TMPDIR ]; then
+ export WORKDIR=/tmp/$USER
+else
+ export WORKDIR=$TMPDIR/$USER
+fi
+
+TMPDIR=$(mktemp -d)
+
+echo ">> Setting up environment..."
+
+source $TOPDIR/init/minimal_eessi_env
+
+if [ -d $EESSI_CVMFS_REPO ]; then
+ echo_green "$EESSI_CVMFS_REPO available, OK!"
+else
+ fatal_error "$EESSI_CVMFS_REPO is not available!"
+fi
+
+# make sure we're in Prefix environment by checking $SHELL
+if [[ ${SHELL} = ${EPREFIX}/bin/bash ]]; then
+ echo_green ">> It looks like we're in a Gentoo Prefix environment, good!"
+else
+ fatal_error "Not running in Gentoo Prefix environment, run '${EPREFIX}/startprefix' first!"
+fi
+
+# avoid that pyc files for EasyBuild are stored in EasyBuild installation directory
+export PYTHONPYCACHEPREFIX=$TMPDIR/pycache
+
+DETECTION_PARAMETERS=''
+GENERIC=0
+EB='eb'
+if [[ "$EASYBUILD_OPTARCH" == "GENERIC" || "$EESSI_SOFTWARE_SUBDIR_OVERRIDE" == *"/generic" ]]; then
+ echo_yellow ">> GENERIC build requested, taking appropriate measures!"
+ DETECTION_PARAMETERS="$DETECTION_PARAMETERS --generic"
+ GENERIC=1
+ export EASYBUILD_OPTARCH=GENERIC
+ EB='eb --optarch=GENERIC'
+fi
+
+echo ">> Determining software subdirectory to use for current build host..."
+if [ -z $EESSI_SOFTWARE_SUBDIR_OVERRIDE ]; then
+ export EESSI_SOFTWARE_SUBDIR_OVERRIDE=$(python3 $TOPDIR/eessi_software_subdir.py $DETECTION_PARAMETERS)
+ echo ">> Determined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE via 'eessi_software_subdir.py $DETECTION_PARAMETERS' script"
+else
+ echo ">> Picking up pre-defined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE: ${EESSI_SOFTWARE_SUBDIR_OVERRIDE}"
+fi
+
+# Set all the EESSI environment variables (respecting $EESSI_SOFTWARE_SUBDIR_OVERRIDE)
+# $EESSI_SILENT - don't print any messages
+# $EESSI_BASIC_ENV - give a basic set of environment variables
+EESSI_SILENT=1 EESSI_BASIC_ENV=1 source $TOPDIR/init/eessi_environment_variables
+
+if [[ -z ${EESSI_SOFTWARE_SUBDIR} ]]; then
+ fatal_error "Failed to determine software subdirectory?!"
+elif [[ "${EESSI_SOFTWARE_SUBDIR}" != "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then
+ fatal_error "Values for EESSI_SOFTWARE_SUBDIR_OVERRIDE (${EESSI_SOFTWARE_SUBDIR_OVERRIDE}) and EESSI_SOFTWARE_SUBDIR (${EESSI_SOFTWARE_SUBDIR}) differ!"
+else
+ echo_green ">> Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory!"
+fi
+
+echo ">> Initializing Lmod..."
+source $EPREFIX/usr/share/Lmod/init/bash
+ml_version_out=$TMPDIR/ml.out
+ml --version &> $ml_version_out
+if [[ $? -eq 0 ]]; then
+ echo_green ">> Found Lmod ${LMOD_VERSION}"
+else
+ fatal_error "Failed to initialize Lmod?! (see output in ${ml_version_out}"
+fi
+
+echo ">> Configuring EasyBuild..."
+source $TOPDIR/configure_easybuild
+
+echo ">> Setting up \$MODULEPATH..."
+# make sure no modules are loaded
+module --force purge
+# ignore current $MODULEPATH entirely
+module unuse $MODULEPATH
+module use $EASYBUILD_INSTALLPATH/modules/all
+if [[ -z ${MODULEPATH} ]]; then
+ fatal_error "Failed to set up \$MODULEPATH?!"
+else
+ echo_green ">> MODULEPATH set up: ${MODULEPATH}"
+fi
+
+echo
+echo_green "Build environment set up with install path ${EASYBUILD_INSTALLPATH}."
+echo
+echo "The build job can be inspected with the following resources:"
+echo " - job directory is $HOME (\$HOME), check for slurm-*.out file"
+echo " - temporary data of the job is available at /tmp"
+echo " - note, the prefix $EESSI_PREFIX is writable"
+echo
+echo "You may want to load an EasyBuild module. The inspect.sh script does not load"
+echo "that automatically, because multiple versions might have been used by the job."
+echo "Choose an EasyBuild version (see installed versions with 'module avail EasyBuild')"
+echo "and simply run"
+echo
+echo "module load EasyBuild/_VERSION_"
+echo
+echo "Replace _VERSION_ with the version you want to use."
+echo
+
+EOF
+chmod u+x ${RESUME_SCRIPT}
+
+# try to map it into the container's $HOME/.profile instead
+# TODO check if script already exists, if so change its name and source it at the beginning of the RESUME_SCRIPT
+if [[ -z ${SINGULARITY_BIND} ]]; then
+ export SINGULARITY_BIND="${RESUME_SCRIPT}:/eessi_bot_job/.profile"
+else
+ export SINGULARITY_BIND="${SINGULARITY_BIND},${RESUME_SCRIPT}:/eessi_bot_job/.profile"
+fi
+
+echo "Executing command to start interactive session to inspect build job:"
+# TODO possibly add information on how to init session after the prefix is
+# entered, initialization consists of
+# - environment variable settings (see 'run_in_compat_layer_env.sh')
+# - setup steps run in 'EESSI-install-software.sh'
+# These initializations are combined into a single script that is executed when
+# the shell in startprefix is started. We set the env variable BASH_ENV here.
+if [[ -z ${run_in_prefix} ]]; then
+ echo "./eessi_container.sh ${CMDLINE_ARGS[@]}"
+ echo " -- ${EESSI_COMPAT_LAYER_DIR}/startprefix"
+ ./eessi_container.sh "${CMDLINE_ARGS[@]}" \
+ -- ${EESSI_COMPAT_LAYER_DIR}/startprefix
+else
+ echo "./eessi_container.sh ${CMDLINE_ARGS[@]}"
+ echo " -- ${EESSI_COMPAT_LAYER_DIR}/startprefix <<< ${run_in_prefix}"
+ ./eessi_container.sh "${CMDLINE_ARGS[@]}" \
+ -- ${EESSI_COMPAT_LAYER_DIR}/startprefix <<< ${run_in_prefix}
+fi
+
+exit 0
diff --git a/bot/test.sh b/bot/test.sh
new file mode 100755
index 0000000000..9d978cdcd0
--- /dev/null
+++ b/bot/test.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+#
+# Dummy script, no tests yet
+#
+# This script is part of the EESSI software layer, see
+# https://github.com/EESSI/software-layer.git
+#
+# author: Kenneth Hoste (HPC-UGent)
+#
+# license: GPLv2
+#
+
+exit 0
diff --git a/configure_easybuild b/configure_easybuild
index 7dca1ce682..c1bd1d390b 100644
--- a/configure_easybuild
+++ b/configure_easybuild
@@ -26,7 +26,7 @@ fi
# note: filtering Bison may break some installations, like Qt5 (see https://github.com/EESSI/software-layer/issues/49)
# filtering pkg-config breaks R-bundle-Bioconductor installation (see also https://github.com/easybuilders/easybuild-easyconfigs/pull/11104)
# problems occur when filtering pkg-config with gnuplot too (picks up Lua 5.1 from $EPREFIX rather than from Lua 5.3 dependency)
-DEPS_TO_FILTER=Autoconf,Automake,Autotools,binutils,bzip2,DBus,flex,gettext,gperf,help2man,intltool,libreadline,libtool,Lua,M4,makeinfo,ncurses,util-linux,XZ,zlib
+DEPS_TO_FILTER=Autoconf,Automake,Autotools,binutils,bzip2,DBus,flex,gettext,gperf,help2man,intltool,libreadline,libtool,M4,makeinfo,ncurses,util-linux,XZ,zlib
# For aarch64 we need to also filter out Yasm.
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/11190
if [[ "$EESSI_CPU_FAMILY" == "aarch64" ]]; then
diff --git a/create_directory_tarballs.sh b/create_directory_tarballs.sh
index dd644ec800..2cd31f38ff 100755
--- a/create_directory_tarballs.sh
+++ b/create_directory_tarballs.sh
@@ -1,7 +1,5 @@
#!/bin/bash
-SOFTWARE_LAYER_TARBALL_URL=https://github.com/NorESSI/software-layer/tarball/nessi.no-2023.06
-
set -eo pipefail
if [ $# -ne 1 ]; then
@@ -11,6 +9,8 @@ fi
version=$1
+SOFTWARE_LAYER_TARBALL_URL=https://github.com/NorESSI/software-layer/tarball/nessi.no-2023.06
+
TOPDIR=$(dirname $(realpath $0))
source $TOPDIR/scripts/utils.sh
@@ -42,7 +42,7 @@ echo_green "Done! Created tarball ${tarname}."
tartmp=$(mktemp -t -d scripts.XXXXX)
mkdir "${tartmp}/${version}"
tarname="eessi-${version}-scripts-$(date +%s).tar.gz"
-curl -Ls ${SOFTWARE_LAYER_TARBALL_URL} | tar xzf - -C "${tartmp}/${version}" --strip-components=1 --no-wildcards-match-slash --wildcards '*/scripts/'
+curl -Ls ${SOFTWARE_LAYER_TARBALL_URL} | tar xzf - -C "${tartmp}/${version}" --strip-components=1 --no-wildcards-match-slash --wildcards '*/scripts/u*' '*/scripts/gpu_support/'
tar czf "${tarname}" -C "${tartmp}" "${version}"
rm -rf "${tartmp}"
diff --git a/create_lmodrc.py b/create_lmodrc.py
index ae65153a20..0e738a530e 100755
--- a/create_lmodrc.py
+++ b/create_lmodrc.py
@@ -17,6 +17,85 @@
}
"""
+GPU_LMOD_RC ="""require("strict")
+local hook = require("Hook")
+local open = io.open
+
+local function read_file(path)
+ local file = open(path, "rb") -- r read mode and b binary mode
+ if not file then return nil end
+ local content = file:read "*a" -- *a or *all reads the whole file
+ file:close()
+ return content
+end
+
+local function cuda_enabled_load_hook(t)
+ local frameStk = require("FrameStk"):singleton()
+ local mt = frameStk:mt()
+ local simpleName = string.match(t.modFullName, "(.-)/")
+ -- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections.
+ -- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse
+ -- to load the CUDA module and print an informative message on how to set up GPU support for EESSI
+ local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n"
+ if simpleName == 'CUDA' then
+ -- get the full host_injections path
+ local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections')
+ -- build final path where the CUDA software should be installed
+ local cudaEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild"
+ local cudaDirExists = isDir(cudaEasyBuildDir)
+ if not cudaDirExists then
+ local advice = "but while the module file exists, the actual software is not entirely shipped with EESSI "
+ advice = advice .. "due to licencing. You will need to install a full copy of the CUDA SDK where EESSI "
+ advice = advice .. "can find it.\\n"
+ advice = advice .. refer_to_docs
+ LmodError("\\nYou requested to load ", simpleName, " ", advice)
+ end
+ end
+ -- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the EESSI linker,
+ -- otherwise, refuse to load the requested module and print error message
+ local haveGpu = mt:haveProperty(simpleName,"arch","gpu")
+ if haveGpu then
+ local arch = os.getenv("EESSI_CPU_FAMILY") or ""
+ local cudaVersionFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt"
+ local cudaDriverFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/libcuda.so"
+ local cudaDriverExists = isFile(cudaDriverFile)
+ local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so")
+ if not (cudaDriverExists or singularityCudaExists) then
+ local advice = "which relies on the CUDA runtime environment and driver libraries. "
+ advice = advice .. "In order to be able to use the module, you will need "
+ advice = advice .. "to make sure EESSI can find the GPU driver libraries on your host system.\\n"
+ advice = advice .. refer_to_docs
+ LmodError("\\nYou requested to load ", simpleName, " ", advice)
+ else
+ -- CUDA driver exists, now we check its version to see if an update is needed
+ if cudaDriverExists then
+ local cudaVersion = read_file(cudaVersionFile)
+ local cudaVersion_req = os.getenv("EESSICUDAVERSION")
+ -- driver CUDA versions don't give a patch version for CUDA
+ local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)")
+ local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)")
+ local driver_libs_need_update = false
+ if major < major_req then
+ driver_libs_need_update = true
+ elseif major == major_req then
+ if minor < minor_req then
+ driver_libs_need_update = true
+ end
+ end
+ if driver_libs_need_update == true then
+ local advice = "but the module you want to load requires CUDA " .. cudaVersion_req .. ". "
+ advice = advice .. "Please update your CUDA driver libraries and then "
+ advice = advice .. "let EESSI know about the update.\\n"
+ advice = advice .. refer_to_docs
+ LmodError("\\nYour driver CUDA version is ", cudaVersion, " ", advice)
+ end
+ end
+ end
+ end
+end
+
+hook.register("load", cuda_enabled_load_hook)
+"""
def error(msg):
sys.stderr.write("ERROR: %s\n" % msg)
@@ -36,6 +115,7 @@ def error(msg):
'dot_lmod': DOT_LMOD,
'prefix': prefix,
}
+lmodrc_txt += '\n' + GPU_LMOD_RC
try:
os.makedirs(os.path.dirname(lmodrc_path), exist_ok=True)
with open(lmodrc_path, 'w') as fp:
diff --git a/create_tarball.sh b/create_tarball.sh
index 8510caebf1..a619df9439 100755
--- a/create_tarball.sh
+++ b/create_tarball.sh
@@ -43,8 +43,14 @@ module_files_list=${tmpdir}/module_files.list.txt
if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod ]; then
# include Lmod cache and configuration file (lmodrc.lua),
# skip whiteout files and backup copies of Lmod cache (spiderT.old.*)
- find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' > ${files_list}
+ find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' >> ${files_list}
fi
+
+# include scripts that were copied by install_scripts.sh, which we want to ship in EESSI repository
+if [ -d ${eessi_version}/scripts ]; then
+ find ${eessi_version}/scripts -type f | grep -v '/\.wh\.' >> ${files_list}
+fi
+
if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules ]; then
# module files
find ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules -type f | grep -v '/\.wh\.' >> ${files_list}
@@ -55,6 +61,7 @@ if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules ]; then
| grep -v '/\.wh\.' | grep -v '/\.modulerc\.lua' | sed -e 's/.lua$//' | sed -e 's@.*/modules/all/@@g' | sort -u \
>> ${module_files_list}
fi
+
if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/software -a -r ${module_files_list} ]; then
# installation directories but only those for which module files were created
# Note, we assume that module names (as defined by 'PACKAGE_NAME/VERSION.lua'
diff --git a/easystacks/pilot.nessi.no/2023.06/README.md b/easystacks/pilot.nessi.no/2023.06/README.md
new file mode 100644
index 0000000000..733ebf9475
--- /dev/null
+++ b/easystacks/pilot.nessi.no/2023.06/README.md
@@ -0,0 +1,7 @@
+File naming matters, since it determines the order in which easystack files are processed.
+
+Software installed with system toolchain should be installed first,
+this includes EasyBuild itself, see `eessi-2023.06-eb-4.8.2-001-system.yml` .
+
+CUDA installations must be done before CUDA is required as dependency for something
+built with a non-system toolchain, see `eessi-2023.06-eb-4.8.2-010-CUDA.yml` .
diff --git a/eb_hooks.py b/eb_hooks.py
index 9031d97831..7e899b502e 100644
--- a/eb_hooks.py
+++ b/eb_hooks.py
@@ -3,10 +3,12 @@
import os
import re
+import easybuild.tools.environment as env
from easybuild.easyblocks.generic.configuremake import obtain_config_guess
+from easybuild.framework.easyconfig.constants import EASYCONFIG_CONSTANTS
from easybuild.tools.build_log import EasyBuildError, print_msg
from easybuild.tools.config import build_option, update_build_option
-from easybuild.tools.filetools import apply_regex_substitutions, copy_file, which
+from easybuild.tools.filetools import apply_regex_substitutions, copy_file, remove_file, symlink, which
from easybuild.tools.run import run_cmd
from easybuild.tools.systemtools import AARCH64, POWER, X86_64, get_cpu_architecture, get_cpu_features
from easybuild.tools.toolchain.compiler import OPTARCH_GENERIC
@@ -19,10 +21,12 @@
CPU_TARGET_NEOVERSE_V1 = 'aarch64/neoverse_v1'
-
+CPU_TARGET_AARCH64_GENERIC = 'aarch64/generic'
EESSI_RPATH_OVERRIDE_ATTR = 'orig_rpath_override_dirs'
+SYSTEM = EASYCONFIG_CONSTANTS['SYSTEM'][0]
+
def get_eessi_envvar(eessi_envvar):
"""Get an EESSI environment variable from the environment"""
@@ -65,6 +69,24 @@ def parse_hook(ec, *args, **kwargs):
if ec.name in PARSE_HOOKS:
PARSE_HOOKS[ec.name](ec, eprefix)
+ # inject the GPU property (if required)
+ ec = inject_gpu_property(ec)
+
+
+def post_ready_hook(self, *args, **kwargs):
+ """
+ Post-ready hook: limit parallellism for selected builds, because they require a lot of memory per used core.
+ """
+ # 'parallel' easyconfig parameter is set via EasyBlock.set_parallel in ready step based on available cores.
+ # here we reduce parallellism to only use half of that for selected software,
+ # to avoid failing builds/tests due to out-of-memory problems
+ if self.name in ['TensorFlow']:
+ parallel = self.cfg['parallel']
+ if parallel > 1:
+ self.cfg['parallel'] = parallel // 2
+ msg = "limiting parallelism to %s (was %s) for %s to avoid out-of-memory failures during building/testing"
+ print_msg(msg % (self.cfg['parallel'], parallel, self.name), log=self.log)
+
def pre_prepare_hook(self, *args, **kwargs):
"""Main pre-prepare hook: trigger custom functions."""
@@ -163,35 +185,60 @@ def parse_hook_fontconfig_add_fonts(ec, eprefix):
def parse_hook_openblas_relax_lapack_tests_num_errors(ec, eprefix):
- """Relax number of failing numerical LAPACK tests."""
+ """Relax number of failing numerical LAPACK tests for aarch64/* CPU targets for OpenBLAS < 0.3.23"""
+ cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if ec.name == 'OpenBLAS':
- cfg_option = 'max_failing_lapack_tests_num_errors'
- num_errors = 302
- if get_cpu_architecture() == AARCH64:
- # relax number of failed numerical LAPACK tests
- ec[cfg_option] = num_errors
- print_msg("Set '%s = %d' in easyconfig for %s on AARCH64", cfg_option, num_errors, ec.name)
- else:
- print_msg("Not changing option %s for %s on non-AARCH64", cfg_option, ec.name)
+ if LooseVersion(ec.version) < LooseVersion('0.3.23'):
+ # relax maximum number of failed numerical LAPACK tests for aarch64/neoverse_v1 CPU target
+ # since the default setting of 150 that works well on other aarch64 targets and x86_64 is a bit too strict
+ # See https://github.com/EESSI/software-layer/issues/314
+ cfg_option = 'max_failing_lapack_tests_num_errors'
+ if cpu_target == CPU_TARGET_NEOVERSE_V1:
+ orig_value = ec[cfg_option]
+ ec[cfg_option] = 400
+ print_msg("Maximum number of failing LAPACK tests with numerical errors for %s relaxed to %s (was %s)",
+ ec.name, ec[cfg_option], orig_value)
+ elif cpu_target == CPU_TARGET_AARCH64_GENERIC:
+ orig_value = ec[cfg_option]
+ ec[cfg_option] = 302
+ print_msg("Maximum number of failing LAPACK tests with numerical errors for %s relaxed to %s (was %s)",
+ ec.name, ec[cfg_option], orig_value) ec.name, ec[cfg_option], orig_value)
+ else:
+ print_msg("Not changing option %s for %s on non-AARCH64", cfg_option, ec.name)
else:
raise EasyBuildError("OpenBLAS-specific hook triggered for non-OpenBLAS easyconfig?!")
-def parse_hook_pillow_set_cpath_library_path(ec, eprefix):
- """Extend CPATH and LIBRARY_PATH environment variables using EESSI_EPREFIX."""
- if ec.name == 'Pillow':
- EESSI_CPATH = os.getenv('EESSI_EPREFIX') + '/usr/include'
- EESSI_LIB_PATH = os.getenv('EESSI_EPREFIX') + '/usr/lib64'
- print_msg("NOTE: Pillow has zlib as a dependancy, The original CPATH value: (%s) has been extended with (%s)",
- os.getenv('CPATH'), EESSI_CPATH)
- print_msg("NOTE: Pillow has zlib as a dependancy, The original LIBRARY_PATH value: (%s) has been extended with (%s)",
- os.getenv('LIBRARY_PATH'), EESSI_LIB_PATH)
- ec.log.info("NOTE: Pillow has zlib as a dependancy, The original CPATH value: (%s) has been extended with (%s)",
- os.getenv('CPATH'), EESSI_CPATH)
- ec.log.info("NOTE: Pillow has zlib as a dependancy, The original LIBRARY_VALUE value: (%s) has been extended with (%s)",
- os.getenv('LIBRARY_PATH'), EESSI_LIB_PATH)
- os.environ['CPATH'] = os.pathsep.join(filter(None,[os.environ.get('CPATH',''), EESSI_CPATH]))
- os.environ['LIBRARY_PATH'] = os.pathsep.join(filter(None,[os.environ.get('LIBRARY_PATH',''), EESSI_LIB_PATH]))
+def parse_hook_pybind11_replace_catch2(ec, eprefix):
+ """
+ Replace Catch2 build dependency in pybind11 easyconfigs with one that doesn't use system toolchain.
+ cfr. https://github.com/easybuilders/easybuild-easyconfigs/pull/19270
+ """
+ # this is mainly necessary to avoid that --missing keeps reporting Catch2/2.13.9 is missing,
+ # and to avoid that we need to use "--from-pr 19270" for every easyconfigs that (indirectly) depends on pybind11
+ if ec.name == 'pybind11' and ec.version in ['2.10.3', '2.11.1']:
+ build_deps = ec['builddependencies']
+ catch2_build_dep = None
+ catch2_name, catch2_version = ('Catch2', '2.13.9')
+ for idx, build_dep in enumerate(build_deps):
+ if build_dep[0] == catch2_name and build_dep[1] == catch2_version:
+ catch2_build_dep = build_dep
+ break
+ if catch2_build_dep and len(catch2_build_dep) == 4 and catch2_build_dep[3] == SYSTEM:
+ build_deps[idx] = (catch2_name, catch2_version)
+
+
+def parse_hook_qt5_check_qtwebengine_disable(ec, eprefix):
+ """
+ Disable check for QtWebEngine in Qt5 as workaround for problem with determining glibc version.
+ """
+ if ec.name == 'Qt5':
+ # workaround for glibc version being reported as "UNKNOWN" in Gentoo Prefix environment by EasyBuild v4.7.2,
+ # see also https://github.com/easybuilders/easybuild-framework/pull/4290
+ ec['check_qtwebengine'] = False
+ print_msg("Checking for QtWebEgine in Qt5 installation has been disabled")
+ else:
+ raise EasyBuildError("Qt5-specific hook triggered for non-Qt5 easyconfig?!")
def parse_hook_ucx_eprefix(ec, eprefix):
@@ -218,6 +265,13 @@ def pre_configure_hook_openblas_optarch_generic(self, *args, **kwargs):
if build_option('optarch') == OPTARCH_GENERIC:
for step in ('build', 'test', 'install'):
self.cfg.update(f'{step}opts', "DYNAMIC_ARCH=1")
+
+ # use -mtune=generic rather than -mcpu=generic in $CFLAGS on aarch64,
+ # because -mcpu=generic implies a particular -march=armv* which clashes with those used by OpenBLAS
+ # when building with DYNAMIC_ARCH=1
+ if get_cpu_architecture() == AARCH64:
+ cflags = os.getenv('CFLAGS').replace('-mcpu=generic', '-mtune=generic')
+ env.setvar('CFLAGS', cflags)
else:
raise EasyBuildError("OpenBLAS-specific hook triggered for non-OpenBLAS easyconfig?!")
@@ -269,27 +323,254 @@ def pre_configure_hook_wrf_aarch64(self, *args, **kwargs):
raise EasyBuildError("WRF-specific hook triggered for non-WRF easyconfig?!")
+def pre_configure_hook_LAMMPS_aarch64(self, *args, **kwargs):
+ """
+ pre-configure hook for LAMMPS:
+ - set kokkos_arch on Aarch64
+ """
+
+ cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
+ if self.name == 'LAMMPS':
+ if self.version == '23Jun2022':
+ if get_cpu_architecture() == AARCH64:
+ if cpu_target == CPU_TARGET_AARCH64_GENERIC:
+ self.cfg['kokkos_arch'] = 'ARM80'
+ else:
+ self.cfg['kokkos_arch'] = 'ARM81'
+ else:
+ raise EasyBuildError("LAMMPS-specific hook triggered for non-LAMMPS easyconfig?!")
+
+
+def pre_configure_hook_atspi2core_filter_ld_library_path(self, *args, **kwargs):
+ """
+ pre-configure hook for at-spi2-core:
+ - instruct GObject-Introspection's g-ir-scanner tool to not set $LD_LIBRARY_PATH
+ when EasyBuild is configured to filter it, see:
+ https://github.com/EESSI/software-layer/issues/196
+ """
+ if self.name == 'at-spi2-core':
+ if build_option('filter_env_vars') and 'LD_LIBRARY_PATH' in build_option('filter_env_vars'):
+ sed_cmd = 'sed -i "s/gir_extra_args = \[/gir_extra_args = \[\\n \'--lib-dirs-envvar=FILTER_LD_LIBRARY_PATH\',/g" %(start_dir)s/atspi/meson.build && '
+ self.cfg.update('preconfigopts', sed_cmd)
+ else:
+ raise EasyBuildError("at-spi2-core-specific hook triggered for non-at-spi2-core easyconfig?!")
+
+
def pre_test_hook(self,*args, **kwargs):
"""Main pre-test hook: trigger custom functions based on software name."""
if self.name in PRE_TEST_HOOKS:
PRE_TEST_HOOKS[self.name](self, *args, **kwargs)
+def pre_test_hook_ignore_failing_tests_ESPResSo(self, *args, **kwargs):
+ """
+ Pre-test hook for ESPResSo: skip failing tests, tests frequently timeout due to known bugs in ESPResSo v4.2.1
+ cfr. https://github.com/EESSI/software-layer/issues/363
+ """
+ if self.name == 'ESPResSo' and self.version == '4.2.1':
+ self.cfg['testopts'] = "|| echo 'ignoring failing tests (probably due to timeouts)'"
+
+
+def pre_test_hook_ignore_failing_tests_FFTWMPI(self, *args, **kwargs):
+ """
+ Pre-test hook for FFTW.MPI: skip failing tests for FFTW.MPI 3.3.10 on neoverse_v1
+ cfr. https://github.com/EESSI/software-layer/issues/325
+ """
+ cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
+ if self.name == 'FFTW.MPI' and self.version == '3.3.10' and cpu_target == CPU_TARGET_NEOVERSE_V1:
+ self.cfg['testopts'] = "|| echo ignoring failing tests"
+
+
def pre_test_hook_ignore_failing_tests_SciPybundle(self, *args, **kwargs):
"""
- Pre-test hook for SciPy-bundle: skip failing tests for SciPy-bundle 2021.10 (currently the only version that is failing).
+ Pre-test hook for SciPy-bundle: skip failing tests for selected SciPy-bundle versions
+ In version 2021.10, 2 failing tests in scipy 1.6.3:
+ FAILED optimize/tests/test_linprog.py::TestLinprogIPSparse::test_bug_6139 - A...
+ FAILED optimize/tests/test_linprog.py::TestLinprogIPSparsePresolve::test_bug_6139
+ = 2 failed, 30554 passed, 2064 skipped, 10992 deselected, 76 xfailed, 7 xpassed, 40 warnings in 380.27s (0:06:20) =
+ In versions 2023.02, 2023.07, and 2023.11, 2 failing tests in scipy (versions 1.10.1, 1.11.1, 1.11.4):
+ FAILED scipy/spatial/tests/test_distance.py::TestPdist::test_pdist_correlation_iris
+ FAILED scipy/spatial/tests/test_distance.py::TestPdist::test_pdist_correlation_iris_float32
+ = 2 failed, 54409 passed, 3016 skipped, 223 xfailed, 13 xpassed, 10917 warnings in 892.04s (0:14:52) =
In previous versions we were not as strict yet on the numpy/SciPy tests
"""
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
- if self.name == 'SciPy-bundle' and self.version == '2021.10' and cpu_target == CPU_TARGET_NEOVERSE_V1:
+ scipy_bundle_versions = ('2021.10', '2023.02', '2023.07', '2023.11')
+ if self.name == 'SciPy-bundle' and self.version in scipy_bundle_versions and cpu_target == CPU_TARGET_NEOVERSE_V1:
+ self.cfg['testopts'] = "|| echo ignoring failing tests"
+
+def pre_test_hook_ignore_failing_tests_netCDF(self, *args, **kwargs):
+ """
+ Pre-test hook for netCDF: skip failing tests for selected netCDF versions on neoverse_v1
+ cfr. https://github.com/EESSI/software-layer/issues/425
+ The following tests are problematic:
+ 163 - nc_test4_run_par_test (Timeout)
+ 190 - h5_test_run_par_tests (Timeout)
+ A few other tests are skipped in the easyconfig and patches for similar issues, see above issue for details.
+ """
+ cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
+ if self.name == 'netCDF' and self.version == '4.9.2' and cpu_target == CPU_TARGET_NEOVERSE_V1:
self.cfg['testopts'] = "|| echo ignoring failing tests"
+def pre_single_extension_hook(ext, *args, **kwargs):
+ """Main pre-extension: trigger custom functions based on software name."""
+ if ext.name in PRE_SINGLE_EXTENSION_HOOKS:
+ PRE_SINGLE_EXTENSION_HOOKS[ext.name](ext, *args, **kwargs)
+
+
+def post_single_extension_hook(ext, *args, **kwargs):
+ """Main post-extension hook: trigger custom functions based on software name."""
+ if ext.name in POST_SINGLE_EXTENSION_HOOKS:
+ POST_SINGLE_EXTENSION_HOOKS[ext.name](ext, *args, **kwargs)
+
+
+def pre_single_extension_isoband(ext, *args, **kwargs):
+ """
+ Pre-extension hook for isoband R package, to fix build on top of recent glibc.
+ """
+ if ext.name == 'isoband' and LooseVersion(ext.version) < LooseVersion('0.2.5'):
+ # use constant value instead of SIGSTKSZ for stack size in vendored testthat included in isoband sources,
+ # cfr. https://github.com/r-lib/isoband/commit/6984e6ce8d977f06e0b5ff73f5d88e5c9a44c027
+ ext.cfg['preinstallopts'] = "sed -i 's/SIGSTKSZ/32768/g' src/testthat/vendor/catch.h && "
+
+
+def pre_single_extension_numpy(ext, *args, **kwargs):
+ """
+ Pre-extension hook for numpy, to change -march=native to -march=armv8.4-a for numpy 1.24.2
+ when building for aarch64/neoverse_v1 CPU target.
+ """
+ cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
+ if ext.name == 'numpy' and ext.version == '1.24.2' and cpu_target == CPU_TARGET_NEOVERSE_V1:
+ # note: this hook is called before build environment is set up (by calling toolchain.prepare()),
+ # so environment variables like $CFLAGS are not defined yet
+ # unsure which of these actually matter for numpy, so changing all of them
+ ext.orig_optarch = build_option('optarch')
+ update_build_option('optarch', 'march=armv8.4-a')
+
+
+def post_single_extension_numpy(ext, *args, **kwargs):
+ """
+ Post-extension hook for numpy, to reset 'optarch' build option.
+ """
+ cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
+ if ext.name == 'numpy' and ext.version == '1.24.2' and cpu_target == CPU_TARGET_NEOVERSE_V1:
+ update_build_option('optarch', ext.orig_optarch)
+
+
+def pre_single_extension_testthat(ext, *args, **kwargs):
+ """
+ Pre-extension hook for testthat R package, to fix build on top of recent glibc.
+ """
+ if ext.name == 'testthat' and LooseVersion(ext.version) < LooseVersion('3.1.0'):
+ # use constant value instead of SIGSTKSZ for stack size,
+ # cfr. https://github.com/r-lib/testthat/issues/1373 + https://github.com/r-lib/testthat/pull/1403
+ ext.cfg['preinstallopts'] = "sed -i 's/SIGSTKSZ/32768/g' inst/include/testthat/vendor/catch.h && "
+
+
+def post_sanitycheck_hook(self, *args, **kwargs):
+ """Main post-sanity-check hook: trigger custom functions based on software name."""
+ if self.name in POST_SANITYCHECK_HOOKS:
+ POST_SANITYCHECK_HOOKS[self.name](self, *args, **kwargs)
+
+
+def post_sanitycheck_cuda(self, *args, **kwargs):
+ """
+ Remove files from CUDA installation that we are not allowed to ship,
+ and replace them with a symlink to a corresponding installation under host_injections.
+ """
+ if self.name == 'CUDA':
+ print_msg("Replacing files in CUDA installation that we can not ship with symlinks to host_injections...")
+
+ # read CUDA EULA, construct allowlist based on section 2.6 that specifies list of files that can be shipped
+ eula_path = os.path.join(self.installdir, 'EULA.txt')
+ relevant_eula_lines = []
+ with open(eula_path) as infile:
+ copy = False
+ for line in infile:
+ if line.strip() == "2.6. Attachment A":
+ copy = True
+ continue
+ elif line.strip() == "2.7. Attachment B":
+ copy = False
+ continue
+ elif copy:
+ relevant_eula_lines.append(line)
+
+ # create list without file extensions, they're not really needed and they only complicate things
+ allowlist = ['EULA', 'README']
+ file_extensions = ['.so', '.a', '.h', '.bc']
+ for line in relevant_eula_lines:
+ for word in line.split():
+ if any(ext in word for ext in file_extensions):
+ allowlist.append(os.path.splitext(word)[0])
+ allowlist = sorted(set(allowlist))
+ self.log.info("Allowlist for files in CUDA installation that can be redistributed: " + ', '.join(allowlist))
+
+ # Do some quick sanity checks for things we should or shouldn't have in the list
+ if 'nvcc' in allowlist:
+ raise EasyBuildError("Found 'nvcc' in allowlist: %s" % allowlist)
+ if 'libcudart' not in allowlist:
+ raise EasyBuildError("Did not find 'libcudart' in allowlist: %s" % allowlist)
+
+ # iterate over all files in the CUDA installation directory
+ for dir_path, _, files in os.walk(self.installdir):
+ for filename in files:
+ full_path = os.path.join(dir_path, filename)
+ # we only really care about real files, i.e. not symlinks
+ if not os.path.islink(full_path):
+ # check if the current file is part of the allowlist
+ basename = os.path.splitext(filename)[0]
+ if basename in allowlist:
+ self.log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path)
+ else:
+ self.log.debug("%s is not found in allowlist, so replacing it with symlink: %s",
+ basename, full_path)
+ # if it is not in the allowlist, delete the file and create a symlink to host_injections
+ host_inj_path = full_path.replace('versions', 'host_injections')
+ # make sure source and target of symlink are not the same
+ if full_path == host_inj_path:
+ raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you "
+ "are using this hook for an EESSI installation?",
+ full_path, host_inj_path)
+ remove_file(full_path)
+ symlink(host_inj_path, full_path)
+ else:
+ raise EasyBuildError("CUDA-specific hook triggered for non-CUDA easyconfig?!")
+
+
+def inject_gpu_property(ec):
+ """
+ Add 'gpu' property, via modluafooter easyconfig parameter
+ """
+ ec_dict = ec.asdict()
+ # Check if CUDA is in the dependencies, if so add the 'gpu' Lmod property
+ if ('CUDA' in [dep[0] for dep in iter(ec_dict['dependencies'])]):
+ ec.log.info("Injecting gpu as Lmod arch property and envvar with CUDA version")
+ key = 'modluafooter'
+ value = 'add_property("arch","gpu")'
+ cuda_version = 0
+ for dep in iter(ec_dict['dependencies']):
+ # Make CUDA a build dependency only (rpathing saves us from link errors)
+ if 'CUDA' in dep[0]:
+ cuda_version = dep[1]
+ ec_dict['dependencies'].remove(dep)
+ if dep not in ec_dict['builddependencies']:
+ ec_dict['builddependencies'].append(dep)
+ value = '\n'.join([value, 'setenv("EESSICUDAVERSION","%s")' % cuda_version])
+ if key in ec_dict:
+ if not value in ec_dict[key]:
+ ec[key] = '\n'.join([ec_dict[key], value])
+ else:
+ ec[key] = value
+ return ec
+
PARSE_HOOKS = {
'CGAL': parse_hook_cgal_toolchainopts_precise,
'fontconfig': parse_hook_fontconfig_add_fonts,
'OpenBLAS': parse_hook_openblas_relax_lapack_tests_num_errors,
- 'Pillow': parse_hook_pillow_set_cpath_library_path,
+ 'pybind11': parse_hook_pybind11_replace_catch2,
+ 'Qt5': parse_hook_qt5_check_qtwebengine_disable,
'UCX': parse_hook_ucx_eprefix,
}
@@ -302,8 +583,27 @@ def pre_test_hook_ignore_failing_tests_SciPybundle(self, *args, **kwargs):
'MetaBAT': pre_configure_hook_metabat_filtered_zlib_dep,
'OpenBLAS': pre_configure_hook_openblas_optarch_generic,
'WRF': pre_configure_hook_wrf_aarch64,
+ 'LAMMPS': pre_configure_hook_LAMMPS_aarch64,
+ 'at-spi2-core': pre_configure_hook_atspi2core_filter_ld_library_path,
}
PRE_TEST_HOOKS = {
+ 'ESPResSo': pre_test_hook_ignore_failing_tests_ESPResSo,
+ 'FFTW.MPI': pre_test_hook_ignore_failing_tests_FFTWMPI,
'SciPy-bundle': pre_test_hook_ignore_failing_tests_SciPybundle,
+ 'netCDF': pre_test_hook_ignore_failing_tests_netCDF,
+}
+
+PRE_SINGLE_EXTENSION_HOOKS = {
+ 'isoband': pre_single_extension_isoband,
+ 'numpy': pre_single_extension_numpy,
+ 'testthat': pre_single_extension_testthat,
+}
+
+POST_SINGLE_EXTENSION_HOOKS = {
+ 'numpy': post_single_extension_numpy,
+}
+
+POST_SANITYCHECK_HOOKS = {
+ 'CUDA': post_sanitycheck_cuda,
}
diff --git a/eessi-2021.06.yml b/eessi-2021.06.yml
deleted file mode 100644
index 3587827746..0000000000
--- a/eessi-2021.06.yml
+++ /dev/null
@@ -1,53 +0,0 @@
-software:
- R-bundle-Bioconductor:
- toolchains:
- foss-2020a:
- versions:
- '3.11':
- versionsuffix: -R-4.0.0
- GROMACS:
- toolchains:
- foss-2020a:
- versions:
- '2020.1':
- versionsuffix: -Python-3.8.2
- '2020.4':
- versionsuffix: -Python-3.8.2
- Horovod:
- toolchains:
- foss-2020a:
- versions:
- '0.21.3':
- versionsuffix: -TensorFlow-2.3.1-Python-3.8.2
- OpenFOAM:
- toolchains:
- foss-2020a:
- versions: ['8', 'v2006']
- OSU-Micro-Benchmarks:
- toolchains:
- gompi-2020a:
- versions: ['5.6.3']
- QuantumESPRESSO:
- toolchains:
- foss-2020a:
- versions: ['6.6']
- TensorFlow:
- toolchains:
- foss-2020a:
- versions:
- '2.3.1':
- versionsuffix: -Python-3.8.2
- RStudio-Server:
- toolchains:
- foss-2020a:
- versions:
- '1.3.1093':
- versionsuffix: -Java-11-R-4.0.0
- ReFrame:
- toolchains:
- SYSTEM:
- versions: '3.6.2'
- code-server:
- toolchains:
- SYSTEM:
- versions: '3.7.3'
diff --git a/eessi-2021.12.yml b/eessi-2021.12.yml
deleted file mode 100644
index 210bbb2845..0000000000
--- a/eessi-2021.12.yml
+++ /dev/null
@@ -1,69 +0,0 @@
-software:
- code-server:
- toolchains:
- SYSTEM:
- versions: '3.7.3'
- GROMACS:
- toolchains:
- foss-2020a:
- versions:
- '2020.1':
- versionsuffix: -Python-3.8.2
- '2020.4':
- versionsuffix: -Python-3.8.2
- Horovod:
- toolchains:
- foss-2020a:
- versions:
- '0.21.3':
- versionsuffix: -TensorFlow-2.3.1-Python-3.8.2
- Nextflow:
- toolchains:
- SYSTEM:
- versions: '22.10.1'
- OpenFOAM:
- toolchains:
- foss-2020a:
- versions: ['8', 'v2006']
- OSU-Micro-Benchmarks:
- toolchains:
- gompi-2020a:
- versions: ['5.6.3']
- gompi-2021a:
- versions: ['5.7.1']
- QuantumESPRESSO:
- toolchains:
- foss-2020a:
- versions: ['6.6']
- R:
- toolchains:
- foss-2021a:
- versions: '4.1.0'
- R-bundle-Bioconductor:
- toolchains:
- foss-2020a:
- versions:
- '3.11':
- versionsuffix: -R-4.0.0
- RStudio-Server:
- toolchains:
- foss-2020a:
- versions:
- '1.3.1093':
- versionsuffix: -Java-11-R-4.0.0
- SciPy-bundle:
- toolchains:
- foss-2021a:
- versions: ['2021.05']
- TensorFlow:
- toolchains:
- foss-2020a:
- versions:
- '2.3.1':
- versionsuffix: -Python-3.8.2
- WRF:
- toolchains:
- foss-2020a:
- versions:
- '3.9.1.1':
- versionsuffix: -dmpar
diff --git a/eessi-2023.06-known-issues.yml b/eessi-2023.06-known-issues.yml
new file mode 100644
index 0000000000..475ee2c1d7
--- /dev/null
+++ b/eessi-2023.06-known-issues.yml
@@ -0,0 +1,28 @@
+- aarch64/neoverse_v1:
+ - ESPResSo-4.2.1-foss-2023a:
+ - issue: https://github.com/EESSI/software-layer/issues/363
+ - info: "ESPResSo tests failing due to timeouts"
+ - FFTW.MPI-3.3.10-gompi-2023a:
+ - issue: https://github.com/EESSI/software-layer/issues/325
+ - info: "Flaky FFTW tests, random failures"
+ - FFTW.MPI-3.3.10-gompi-2023b:
+ - issue: https://github.com/EESSI/software-layer/issues/325
+ - info: "Flaky FFTW tests, random failures"
+ - netCDF-4.9.2-gompi-2023a.eb:
+ - issue: https://github.com/EESSI/software-layer/issues/425
+ - info: "netCDF intermittent test failures"
+ - netCDF-4.9.2-gompi-2023b.eb:
+ - issue: https://github.com/EESSI/software-layer/issues/425
+ - info: "netCDF intermittent test failures"
+ - OpenBLAS-0.3.21-GCC-12.2.0:
+ - issue: https://github.com/EESSI/software-layer/issues/314
+ - info: "Increased number of numerical errors in OpenBLAS test suite (344 vs max. 150 on x86_64/*)"
+ - SciPy-bundle-2023.02-gfbf-2022b:
+ - issue: https://github.com/EESSI/software-layer/issues/318
+ - info: "numpy built with -march=armv8.4-a instead of -mcpu=native (no SVE) + 2 failing tests (vs 50005 passed) in scipy test suite"
+ - SciPy-bundle-2023.07-gfbf-2023a:
+ - issue: https://github.com/EESSI/software-layer/issues/318
+ - info: "2 failing tests (vs 54409 passed) in scipy test suite"
+ - SciPy-bundle-2023.11-gfbf-2023b:
+ - issue: https://github.com/EESSI/software-layer/issues/318
+ - info: "2 failing tests (vs 54876 passed) in scipy test suite"
diff --git a/eessi_container.sh b/eessi_container.sh
index 5e870c3b1b..6e68524edb 100755
--- a/eessi_container.sh
+++ b/eessi_container.sh
@@ -30,8 +30,8 @@
# -. initial settings & exit codes
TOPDIR=$(dirname $(realpath $0))
-source ${TOPDIR}/scripts/utils.sh
-source ${TOPDIR}/scripts/cfg_files.sh
+source "${TOPDIR}"/scripts/utils.sh
+source "${TOPDIR}"/scripts/cfg_files.sh
# exit codes: bitwise shift codes to allow for combination of exit codes
# ANY_ERROR_EXITCODE is sourced from ${TOPDIR}/scripts/utils.sh
@@ -46,6 +46,7 @@ SAVE_ERROR_EXITCODE=$((${ANY_ERROR_EXITCODE} << 8))
HTTP_PROXY_ERROR_EXITCODE=$((${ANY_ERROR_EXITCODE} << 9))
HTTPS_PROXY_ERROR_EXITCODE=$((${ANY_ERROR_EXITCODE} << 10))
RUN_SCRIPT_MISSING_EXITCODE=$((${ANY_ERROR_EXITCODE} << 11))
+NVIDIA_MODE_UNKNOWN_EXITCODE=$((${ANY_ERROR_EXITCODE} << 12))
# CernVM-FS settings
CVMFS_VAR_LIB="var-lib-cvmfs"
@@ -72,12 +73,17 @@ display_help() {
echo " -a | --access {ro,rw} - ro (read-only), rw (read & write) [default: ro]"
echo " -c | --container IMG - image file or URL defining the container to use"
echo " [default: docker://ghcr.io/eessi/build-node:debian11]"
- echo " -h | --help - display this usage information [default: false]"
echo " -g | --storage DIR - directory space on host machine (used for"
echo " temporary data) [default: 1. TMPDIR, 2. /tmp]"
+ echo " -h | --help - display this usage information [default: false]"
+ echo " -i | --host-injections - directory to link to for host_injections "
+ echo " [default: /..storage../opt-eessi]"
echo " -l | --list-repos - list available repository identifiers [default: false]"
echo " -m | --mode MODE - with MODE==shell (launch interactive shell) or"
echo " MODE==run (run a script or command) [default: shell]"
+ echo " -n | --nvidia MODE - configure the container to work with NVIDIA GPUs,"
+ echo " MODE==install for a CUDA installation, MODE==run to"
+ echo " attach a GPU, MODE==all for both [default: false]"
echo " -r | --repository CFG - configuration file or identifier defining the"
echo " repository to use [default: EESSI via"
echo " default container, see --container]"
@@ -111,6 +117,7 @@ VERBOSE=0
STORAGE=
LIST_REPOS=0
MODE="shell"
+SETUP_NVIDIA=0
REPOSITORY="EESSI"
RESUME=
SAVE=
@@ -141,6 +148,10 @@ while [[ $# -gt 0 ]]; do
display_help
exit 0
;;
+ -i|--host-injections)
+ USER_HOST_INJECTIONS="$2"
+ shift 2
+ ;;
-l|--list-repos)
LIST_REPOS=1
shift 1
@@ -149,6 +160,11 @@ while [[ $# -gt 0 ]]; do
MODE="$2"
shift 2
;;
+ -n|--nvidia)
+ SETUP_NVIDIA=1
+ NVIDIA_MODE="$2"
+ shift 2
+ ;;
-r|--repository)
REPOSITORY="$2"
shift 2
@@ -224,6 +240,13 @@ if [[ "${MODE}" != "shell" && "${MODE}" != "run" ]]; then
fatal_error "unknown execution mode '${MODE}'" "${MODE_UNKNOWN_EXITCODE}"
fi
+# Also validate the NVIDIA GPU mode (if present)
+if [[ ${SETUP_NVIDIA} -eq 1 ]]; then
+ if [[ "${NVIDIA_MODE}" != "run" && "${NVIDIA_MODE}" != "install" && "${NVIDIA_MODE}" != "all" ]]; then
+ fatal_error "unknown NVIDIA mode '${NVIDIA_MODE}'" "${NVIDIA_MODE_UNKNOWN_EXITCODE}"
+ fi
+fi
+
# TODO (arg -r|--repository) check if repository is known
# REPOSITORY_ERROR_EXITCODE
if [[ ! -z "${REPOSITORY}" && "${REPOSITORY}" != "EESSI" && ! -r ${EESSI_REPOS_CFG_FILE} ]]; then
@@ -310,12 +333,25 @@ fi
# |-overlay-work
# |-home
# |-repos_cfg
+# |-opt-eessi (unless otherwise specificed for host_injections)
# tmp dir for EESSI
EESSI_TMPDIR=${EESSI_HOST_STORAGE}
mkdir -p ${EESSI_TMPDIR}
[[ ${VERBOSE} -eq 1 ]] && echo "EESSI_TMPDIR=${EESSI_TMPDIR}"
+# Set host_injections directory and ensure it is a writable directory (if user provided)
+if [ -z ${USER_HOST_INJECTIONS+x} ]; then
+ # Not set, so use our default
+ HOST_INJECTIONS=${EESSI_TMPDIR}/opt-eessi
+ mkdir -p $HOST_INJECTIONS
+else
+ # Make sure the host_injections directory specified exists and is a folder
+ mkdir -p ${USER_HOST_INJECTIONS} || fatal_error "host_injections directory ${USER_HOST_INJECTIONS} is either not a directory or cannot be created"
+ HOST_INJECTIONS=${USER_HOST_INJECTIONS}
+fi
+[[ ${VERBOSE} -eq 1 ]] && echo "HOST_INJECTIONS=${HOST_INJECTIONS}"
+
# configure Singularity: if SINGULARITY_CACHEDIR is already defined, use that
# a global SINGULARITY_CACHEDIR would ensure that we don't consume
# storage space again and again for the container & also speed-up
@@ -394,12 +430,36 @@ fi
[[ ${VERBOSE} -eq 1 ]] && echo "SINGULARITY_HOME=${SINGULARITY_HOME}"
# define paths to add to SINGULARITY_BIND (added later when all BIND mounts are defined)
-BIND_PATHS="${EESSI_CVMFS_VAR_LIB}:/var/lib/cvmfs,${EESSI_CVMFS_VAR_RUN}:/var/run/cvmfs"
+BIND_PATHS="${EESSI_CVMFS_VAR_LIB}:/var/lib/cvmfs,${EESSI_CVMFS_VAR_RUN}:/var/run/cvmfs,${HOST_INJECTIONS}:/opt/eessi"
# provide a '/tmp' inside the container
BIND_PATHS="${BIND_PATHS},${EESSI_TMPDIR}:${TMP_IN_CONTAINER}"
[[ ${VERBOSE} -eq 1 ]] && echo "BIND_PATHS=${BIND_PATHS}"
+declare -a ADDITIONAL_CONTAINER_OPTIONS=()
+
+# Configure anything we need for NVIDIA GPUs and CUDA installation
+if [[ ${SETUP_NVIDIA} -eq 1 ]]; then
+ if [[ "${NVIDIA_MODE}" == "run" || "${NVIDIA_MODE}" == "all" ]]; then
+ # Give singularity the appropriate flag
+ ADDITIONAL_CONTAINER_OPTIONS+=("--nv")
+ [[ ${VERBOSE} -eq 1 ]] && echo "ADDITIONAL_CONTAINER_OPTIONS=${ADDITIONAL_CONTAINER_OPTIONS[@]}"
+ fi
+ if [[ "${NVIDIA_MODE}" == "install" || "${NVIDIA_MODE}" == "all" ]]; then
+ # Add additional bind mounts to allow CUDA to install within a container
+ # (Experience tells us that these are necessary, but we don't know _why_
+ # as the CUDA installer is a black box. The suspicion is that the CUDA
+ # installer gets confused by the permissions on these directories when
+ # inside a container)
+ EESSI_VAR_LOG=${EESSI_TMPDIR}/var-log
+ EESSI_USR_LOCAL_CUDA=${EESSI_TMPDIR}/usr-local-cuda
+ mkdir -p ${EESSI_VAR_LOG}
+ mkdir -p ${EESSI_USR_LOCAL_CUDA}
+ BIND_PATHS="${BIND_PATHS},${EESSI_VAR_LOG}:/var/log,${EESSI_USR_LOCAL_CUDA}:/usr/local/cuda"
+ [[ ${VERBOSE} -eq 1 ]] && echo "BIND_PATHS=${BIND_PATHS}"
+ fi
+fi
+
# set up repository config (always create directory repos_cfg and populate it with info when
# arg -r|--repository is used)
mkdir -p ${EESSI_TMPDIR}/repos_cfg
@@ -513,6 +573,10 @@ fi
# 4. set up vars and dirs specific to a scenario
declare -a EESSI_FUSE_MOUNTS=()
+
+# always mount cvmfs-config repo (to get access to software.eessi.io)
+# Commented out intentionally EESSI_FUSE_MOUNTS+=("--fusemount" "container:cvmfs2 cvmfs-config.cern.ch /cvmfs/cvmfs-config.cern.ch")
+
if [[ "${ACCESS}" == "ro" ]]; then
export EESSI_READONLY="container:cvmfs2 ${repo_name} /cvmfs/${repo_name}"
@@ -558,8 +622,8 @@ if [ ! -z ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} ]; then
fi
echo "Launching container with command (next line):"
-echo "singularity ${RUN_QUIET} ${MODE} ${EESSI_FUSE_MOUNTS[@]} ${CONTAINER} $@"
-singularity ${RUN_QUIET} ${MODE} "${EESSI_FUSE_MOUNTS[@]}" ${CONTAINER} "$@"
+echo "singularity ${RUN_QUIET} ${MODE} ${ADDITIONAL_CONTAINER_OPTIONS[@]} ${EESSI_FUSE_MOUNTS[@]} ${CONTAINER} $@"
+singularity ${RUN_QUIET} ${MODE} "${ADDITIONAL_CONTAINER_OPTIONS[@]}" "${EESSI_FUSE_MOUNTS[@]}" ${CONTAINER} "$@"
exit_code=$?
# 6. save tmp if requested (arg -s|--save)
diff --git a/init/arch_specs/eessi_arch_arm.spec b/init/arch_specs/eessi_arch_arm.spec
index f8f21f9cd3..b5c9275043 100755
--- a/init/arch_specs/eessi_arch_arm.spec
+++ b/init/arch_specs/eessi_arch_arm.spec
@@ -1,6 +1,6 @@
# ARM CPU architecture specifications
# Software path in EESSI | Vendor ID | List of defining CPU features
-"aarch64/neoverse-n1" "ARM" "asimd" # Ampere Altra
-"aarch64/neoverse-n1" "" "asimd" # AWS Graviton2
-"aarch64/neoverse-v1" "ARM" "asimd svei8mm"
-"aarch64/neoverse-v1" "" "asimd svei8mm" # AWS Graviton3
+"aarch64/neoverse_n1" "ARM" "asimd" # Ampere Altra
+"aarch64/neoverse_n1" "" "asimd" # AWS Graviton2
+"aarch64/neoverse_v1" "ARM" "asimd svei8mm"
+"aarch64/neoverse_v1" "" "asimd svei8mm" # AWS Graviton3
diff --git a/init/bash b/init/bash
index 26598bb9dd..2097f03617 100644
--- a/init/bash
+++ b/init/bash
@@ -5,6 +5,7 @@ function show_msg {
echo "$msg"
fi
}
+
# The following method should be safe, but might break if file is a symlink
# (could switch to $(dirname "$(readlink -f "$BASH_SOURCE")") in that case)
source $(dirname "$BASH_SOURCE")/eessi_environment_variables
@@ -23,7 +24,7 @@ if [ $? -eq 0 ]; then
source $EESSI_EPREFIX/usr/share/Lmod/init/bash
# prepend location of modules for EESSI software stack to $MODULEPATH
- echo "Prepending $EESSI_MODULEPATH to \$MODULEPATH..." >> $output
+ show_msg "Prepending $EESSI_MODULEPATH to \$MODULEPATH..."
module use $EESSI_MODULEPATH
#show_msg ""
diff --git a/init/eessi_archdetect.sh b/init/eessi_archdetect.sh
index 81846658c7..4dd2436cc1 100755
--- a/init/eessi_archdetect.sh
+++ b/init/eessi_archdetect.sh
@@ -70,7 +70,7 @@ check_allinfirst(){
cpupath(){
# If EESSI_SOFTWARE_SUBDIR_OVERRIDE is set, use it
log "DEBUG" "cpupath: Override variable set as '$EESSI_SOFTWARE_SUBDIR_OVERRIDE' "
- [ $EESI_SOFTWARE_SUBDIR_OVERRIDE ] && echo ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} && exit
+ [ $EESSI_SOFTWARE_SUBDIR_OVERRIDE ] && echo ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} && exit
# Identify the best matching CPU architecture from a list of supported specifications for the host CPU
# Return the path to the installation files in NESSI of the best matching architecture
@@ -118,7 +118,7 @@ cpupath(){
# each flag in this CPU specification must be found in the list of flags of the host
check_allinfirst "${cpu_flags[*]}" ${arch_spec[2]} && best_arch_match=${arch_spec[0]} && \
all_arch_matches="$best_arch_match:$all_arch_matches" && \
- log "DEBUG" "cpupath: host CPU best match updated to $best_arch_match"
+ log "DEBUG" "cpupath: host CPU best match updated to $best_arch_match"
fi
done
diff --git a/init/eessi_environment_variables b/init/eessi_environment_variables
index d4a2e72b36..8d149c4042 100644
--- a/init/eessi_environment_variables
+++ b/init/eessi_environment_variables
@@ -2,6 +2,11 @@
# $BASH_SOURCE points to correct path, see also http://mywiki.wooledge.org/BashFAQ/028
EESSI_INIT_DIR_PATH=$(dirname $(realpath $BASH_SOURCE))
+function error() {
+ echo -e "\e[31mERROR: $1\e[0m" >&2
+ false
+}
+
function show_msg {
# only echo msg if EESSI_SILENT is unset
msg=$1
@@ -10,11 +15,6 @@ function show_msg {
fi
}
-function error() {
- echo -e "\e[31mERROR: $1\e[0m" >&2
- false
-}
-
# set up minimal environment: $EESSI_PREFIX, $EESSI_VERSION, $EESSI_OS_TYPE, $EESSI_CPU_FAMILY, $EPREFIX
source $EESSI_INIT_DIR_PATH/minimal_eessi_env
diff --git a/install_scripts.sh b/install_scripts.sh
new file mode 100755
index 0000000000..6e6cd825ac
--- /dev/null
+++ b/install_scripts.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+#
+# Script to install scripts from the software-layer repo into the EESSI software stack
+
+display_help() {
+ echo "usage: $0 [OPTIONS]"
+ echo " -p | --prefix - prefix to copy the scripts to"
+ echo " -h | --help - display this usage information"
+}
+
+compare_and_copy() {
+ if [ "$#" -ne 2 ]; then
+ echo "Usage of function: compare_and_copy "
+ return 1
+ fi
+
+ source_file="$1"
+ destination_file="$2"
+
+ if [ ! -f "$destination_file" ] || ! diff -q "$source_file" "$destination_file" ; then
+ cp "$source_file" "$destination_file"
+ echo "File $1 copied to $2"
+ else
+ echo "Files $1 and $2 are identical. No copy needed."
+ fi
+}
+
+
+POSITIONAL_ARGS=()
+
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ -p|--prefix)
+ INSTALL_PREFIX="$2"
+ shift 2
+ ;;
+ -h|--help)
+ display_help # Call your function
+ # no shifting needed here, we're done.
+ exit 0
+ ;;
+ -*|--*)
+ echo "Error: Unknown option: $1" >&2
+ exit 1
+ ;;
+ *) # No more options
+ POSITIONAL_ARGS+=("$1") # save positional arg
+ shift
+ ;;
+ esac
+done
+
+set -- "${POSITIONAL_ARGS[@]}"
+
+TOPDIR=$(dirname $(realpath $0))
+
+# Subdirs for generic scripts
+SCRIPTS_DIR_SOURCE=${TOPDIR}/scripts # Source dir
+SCRIPTS_DIR_TARGET=${INSTALL_PREFIX}/scripts # Target dir
+
+# Create target dir
+mkdir -p ${SCRIPTS_DIR_TARGET}
+
+# Copy scripts into this prefix
+echo "copying scripts from ${SCRIPTS_DIR_SOURCE} to ${SCRIPTS_DIR_TARGET}"
+for file in utils.sh; do
+ compare_and_copy ${SCRIPTS_DIR_SOURCE}/${file} ${SCRIPTS_DIR_TARGET}/${file}
+done
+# Subdirs for GPU support
+NVIDIA_GPU_SUPPORT_DIR_SOURCE=${SCRIPTS_DIR_SOURCE}/gpu_support/nvidia # Source dir
+NVIDIA_GPU_SUPPORT_DIR_TARGET=${SCRIPTS_DIR_TARGET}/gpu_support/nvidia # Target dir
+
+# Create target dir
+mkdir -p ${NVIDIA_GPU_SUPPORT_DIR_TARGET}
+
+# Copy files from this directory into the prefix
+# To be on the safe side, we dont do recursive copies, but we are explicitely copying each individual file we want to add
+echo "copying scripts from ${NVIDIA_GPU_SUPPORT_DIR_SOURCE} to ${NVIDIA_GPU_SUPPORT_DIR_TARGET}"
+for file in install_cuda_host_injections.sh link_nvidia_host_libraries.sh; do
+ compare_and_copy ${NVIDIA_GPU_SUPPORT_DIR_SOURCE}/${file} ${NVIDIA_GPU_SUPPORT_DIR_TARGET}/${file}
+done
diff --git a/licenses/README.md b/licenses/README.md
new file mode 100644
index 0000000000..36a7615b21
--- /dev/null
+++ b/licenses/README.md
@@ -0,0 +1,3 @@
+see https://spdx.org/licenses
+
+Python function to download SPDX list of licenses is available in `spdx.py`
diff --git a/licenses/licenses.json b/licenses/licenses.json
new file mode 100644
index 0000000000..8831ed368c
--- /dev/null
+++ b/licenses/licenses.json
@@ -0,0 +1,10 @@
+{
+ "EasyBuild": {
+ "spdx": "GPL-2.0-only",
+ "license_url": "https://easybuild.io"
+ },
+ "GCCcore": {
+ "spdx": "GPL-2.0-with-GCC-exception",
+ "license_url": "https://github.com/gcc-mirror/gcc/blob/master/COPYING"
+ }
+}
diff --git a/licenses/spdx.py b/licenses/spdx.py
new file mode 100644
index 0000000000..06c3edb4e6
--- /dev/null
+++ b/licenses/spdx.py
@@ -0,0 +1,100 @@
+import json
+import logging
+import sys
+import urllib.request
+
+SPDX_LICENSE_LIST_URL = 'https://raw.githubusercontent.com/spdx/license-list-data/main/json/licenses.json'
+
+LICENSE_URL = 'license_url'
+SPDX = 'spdx'
+
+spdx_license_list = None
+
+# Configure the logging module
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+
+
+def get_spdx_license_list():
+ """
+ Download JSON file with current list of SPDX licenses, parse it, and return it as a Python dictionary.
+ """
+ global spdx_license_list
+
+ if spdx_license_list is None:
+ with urllib.request.urlopen(SPDX_LICENSE_LIST_URL) as fp:
+ spdx_license_list = json.load(fp)
+ version, release_date = spdx_license_list['licenseListVersion'], spdx_license_list['releaseDate']
+ logging.info(f"Downloaded version {version} of SPDX license list (release date: {release_date})")
+ licenses = spdx_license_list['licenses']
+ logging.info(f"Found info on {len(licenses)} licenses!")
+
+ return spdx_license_list
+
+
+def license_info(spdx_id):
+ """Find license with specified SPDX identifier."""
+
+ spdx_license_list = get_spdx_license_list()
+
+ licenses = spdx_license_list['licenses']
+ for lic in licenses:
+ if lic['licenseId'] == spdx_id:
+ return lic
+
+ # if no match is found, return None as result
+ return None
+
+
+def read_licenses(path):
+ """
+ Read software project to license mapping from specified path
+ """
+ with open(path) as fp:
+ licenses = json.loads(fp.read())
+
+ return licenses
+
+
+def check_licenses(licenses):
+ """
+ Check mapping of software licenses: make sure SPDX identifiers are valid.
+ """
+ faulty_licenses = {}
+
+ for software_name in licenses:
+ spdx_lic_id = licenses[software_name][SPDX]
+ lic_info = license_info(spdx_lic_id)
+ if lic_info:
+ lic_url = licenses[software_name][LICENSE_URL]
+ logging.info(f"License for software '{software_name}': {lic_info['name']} (see {lic_url})")
+ else:
+ logging.warning(f"Found faulty SPDX license ID for {software_name}: {spdx_lic_id}")
+ faulty_licenses[software_name] = spdx_lic_id
+
+ if faulty_licenses:
+ logging.warning(f"Found {len(faulty_licenses)} faulty SPDIX license IDs (out of {len(licenses)})!")
+ result = False
+ else:
+ logging.info(f"License check passed for {len(licenses)} licenses!")
+ result = True
+
+ return result
+
+
+def main(args):
+ if len(args) == 1:
+ licenses_path = args[0]
+ else:
+ logging.error("Usage: python spdx.py ")
+ sys.exit(1)
+
+ licenses = read_licenses(licenses_path)
+ if check_licenses(licenses):
+ logging.info("All license checks PASSED!")
+ else:
+ logging.error("One or more licence checks failed!")
+ sys.exit(2)
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff --git a/load_easybuild_module.sh b/load_easybuild_module.sh
index c23caff532..d1bfd18bb5 100755
--- a/load_easybuild_module.sh
+++ b/load_easybuild_module.sh
@@ -23,14 +23,14 @@ fi
EB_VERSION=${1}
# make sure that environment variables that we expect to be set are indeed set
-if [ -z "${TMPDIR}" ]; then
+if [ -z "${TMPDIR}" ]; then
echo "\$TMPDIR is not set" >&2
exit 2
fi
# ${EB} is used to specify which 'eb' command should be used;
# can potentially be more than just 'eb', for example when using 'eb --optarch=GENERIC'
-if [ -z "${EB}" ]; then
+if [ -z "${EB}" ]; then
echo "\$EB is not set" >&2
exit 2
fi
diff --git a/scripts/gpu_support/nvidia/install_cuda_host_injections.sh b/scripts/gpu_support/nvidia/install_cuda_host_injections.sh
new file mode 100755
index 0000000000..a9310d817a
--- /dev/null
+++ b/scripts/gpu_support/nvidia/install_cuda_host_injections.sh
@@ -0,0 +1,211 @@
+#!/usr/bin/env bash
+
+# This script can be used to install CUDA under the `.../host_injections` directory.
+# This provides the parts of the CUDA installation that cannot be redistributed as
+# part of EESSI due to license limitations. While GPU-based software from EESSI will
+# _run_ without these, installation of additional CUDA software requires the CUDA
+# installation(s) under `host_injections` to be present.
+#
+# The `host_injections` directory is a variant symlink that by default points to
+# `/opt/eessi`, unless otherwise defined in the local CVMFS configuration (see
+# https://cvmfs.readthedocs.io/en/stable/cpt-repo.html#variant-symlinks). For the
+# installation to be successful, this directory needs to be writeable by the user
+# executing this script.
+
+# Initialise our bash functions
+TOPDIR=$(dirname $(realpath $BASH_SOURCE))
+source "$TOPDIR"/../../utils.sh
+
+# Function to display help message
+show_help() {
+ echo "Usage: $0 [OPTIONS]"
+ echo "Options:"
+ echo " --help Display this help message"
+ echo " --accept-cuda-eula You _must_ accept the CUDA EULA to install"
+ echo " CUDA, see the EULA at"
+ echo " https://docs.nvidia.com/cuda/eula/index.html"
+ echo " -c, --cuda-version CUDA_VERSION Specify a version o CUDA to install (must"
+ echo " have a corresponding easyconfig in the"
+ echo " EasyBuild release)"
+ echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary"
+ echo " storage during the CUDA install"
+ echo " (must have >10GB available)"
+}
+
+# Initialize variables
+install_cuda_version=""
+eula_accepted=0
+
+# Parse command-line options
+while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --help)
+ show_help
+ exit 0
+ ;;
+ -c|--cuda-version)
+ if [ -n "$2" ]; then
+ install_cuda_version="$2"
+ shift 2
+ else
+ echo "Error: Argument required for $1"
+ show_help
+ exit 1
+ fi
+ ;;
+ --accept-cuda-eula)
+ eula_accepted=1
+ shift 1
+ ;;
+ -t|--temp-dir)
+ if [ -n "$2" ]; then
+ CUDA_TEMP_DIR="$2"
+ shift 2
+ else
+ echo "Error: Argument required for $1"
+ show_help
+ exit 1
+ fi
+ ;;
+ *)
+ show_help
+ fatal_error "Error: Unknown option: $1"
+ ;;
+ esac
+done
+
+# Make sure EESSI is initialised
+check_eessi_initialised
+
+# Make sure the CUDA version supplied is a semantic version
+is_semantic_version() {
+ local version=$1
+ local regex='^[0-9]+\.[0-9]+\.[0-9]+$'
+
+ if [[ $version =~ $regex ]]; then
+ return 0 # Return success (0) if it's a semantic version
+ else
+ return 1 # Return failure (1) if it's not a semantic version
+ fi
+}
+if ! is_semantic_version "$install_cuda_version"; then
+ show_help
+ error="\nYou must provide a semantic version for CUDA (e.g., 12.1.1) via the appropriate\n"
+ error="${error}command line option. This script is intended for use with EESSI so the 'correct'\n"
+ error="${error}version to provide is probably one of those available under\n"
+ error="${error}$EESSI_SOFTWARE_PATH/software/CUDA\n"
+ fatal_error "${error}"
+fi
+
+# Make sure they have accepted the CUDA EULA
+if [ "$eula_accepted" -ne 1 ]; then
+ show_help
+ error="\nYou _must_ accept the CUDA EULA via the appropriate command line option.\n"
+ fatal_error "${error}"
+fi
+
+# As an installation location just use $EESSI_SOFTWARE_PATH but replacing `versions` with `host_injections`
+# (CUDA is a binary installation so no need to worry too much about the EasyBuild setup)
+cuda_install_parent=${EESSI_SOFTWARE_PATH/versions/host_injections}
+
+# Only install CUDA if specified version is not found.
+# (existence of easybuild subdir implies a successful install)
+if [ -d "${cuda_install_parent}"/software/CUDA/"${install_cuda_version}"/easybuild ]; then
+ echo_green "CUDA software found! No need to install CUDA again."
+else
+ # We need to be able write to the installation space so let's make sure we can
+ if ! create_directory_structure "${cuda_install_parent}"/software/CUDA ; then
+ fatal_error "No write permissions to directory ${cuda_install_parent}/software/CUDA"
+ fi
+
+ # we need a directory we can use for temporary storage
+ if [[ -z "${CUDA_TEMP_DIR}" ]]; then
+ tmpdir=$(mktemp -d)
+ else
+ tmpdir="${CUDA_TEMP_DIR}"/temp
+ if ! mkdir "$tmpdir" ; then
+ fatal_error "Could not create directory ${tmpdir}"
+ fi
+ fi
+
+ required_space_in_tmpdir=50000
+ # Let's see if we have sources and build locations defined if not, we use the temporary space
+ if [[ -z "${EASYBUILD_BUILDPATH}" ]]; then
+ export EASYBUILD_BUILDPATH=${tmpdir}/build
+ required_space_in_tmpdir=$((required_space_in_tmpdir + 5000000))
+ fi
+ if [[ -z "${EASYBUILD_SOURCEPATH}" ]]; then
+ export EASYBUILD_SOURCEPATH=${tmpdir}/sources
+ required_space_in_tmpdir=$((required_space_in_tmpdir + 5000000))
+ fi
+
+ # The install is pretty fat, you need lots of space for download/unpack/install (~3*5GB),
+ # need to do a space check before we proceed
+ avail_space=$(df --output=avail "${cuda_install_parent}"/ | tail -n 1 | awk '{print $1}')
+ if (( avail_space < 5000000 )); then
+ fatal_error "Need at least 5GB disk space to install CUDA under ${cuda_install_parent}, exiting now..."
+ fi
+ avail_space=$(df --output=avail "${tmpdir}"/ | tail -n 1 | awk '{print $1}')
+ if (( avail_space < required_space_in_tmpdir )); then
+ error="Need at least ${required_space_in_tmpdir} disk space under ${tmpdir}.\n"
+ error="${error}Set the environment variable CUDA_TEMP_DIR to a location with adequate space to pass this check."
+ error="${error}You can alternatively set EASYBUILD_BUILDPATH and/or EASYBUILD_SOURCEPATH "
+ error="${error}to reduce this requirement. Exiting now..."
+ fatal_error "${error}"
+ fi
+
+ if ! command -v "eb" &>/dev/null; then
+ echo_yellow "Attempting to load an EasyBuild module to do actual install"
+ module load EasyBuild
+ # There are some scenarios where this may fail
+ if [ $? -ne 0 ]; then
+ error="'eb' command not found in your environment and\n"
+ error="${error} module load EasyBuild\n"
+ error="${error}failed for some reason.\n"
+ error="${error}Please re-run this script with the 'eb' command available."
+ fatal_error "${error}"
+ fi
+ fi
+
+ cuda_easyconfig="CUDA-${install_cuda_version}.eb"
+
+ # Check the easyconfig file is available in the release
+ # (eb search always returns 0, so we need a grep to ensure a usable exit code)
+ eb --search ^${cuda_easyconfig}|grep CUDA > /dev/null 2>&1
+ # Check the exit code
+ if [ $? -ne 0 ]; then
+ eb_version=$(eb --version)
+ available_cuda_easyconfigs=$(eb --search ^CUDA-*.eb|grep CUDA)
+
+ error="The easyconfig ${cuda_easyconfig} was not found in EasyBuild version:\n"
+ error="${error} ${eb_version}\n"
+ error="${error}You either need to give a different version of CUDA to install _or_ \n"
+ error="${error}use a different version of EasyBuild for the installation.\n"
+ error="${error}\nThe versions of available with the current eb command are:\n"
+ error="${error}${available_cuda_easyconfigs}"
+ fatal_error "${error}"
+ fi
+
+ # We need the --rebuild option, as the CUDA module may or may not be on the
+ # `MODULEPATH` yet. Even if it is, we still want to redo this installation
+ # since it will provide the symlinked targets for the parts of the CUDA
+ # installation in the `.../versions/...` prefix
+ # We install the module in our `tmpdir` since we do not need the modulefile,
+ # we only care about providing the targets for the symlinks.
+ extra_args="--rebuild --installpath-modules=${tmpdir}"
+
+ # We don't want hooks used in this install, we need a vanilla CUDA installation
+ touch "$tmpdir"/none.py
+ # shellcheck disable=SC2086 # Intended splitting of extra_args
+ eb --prefix="$tmpdir" ${extra_args} --accept-eula-for=CUDA --hooks="$tmpdir"/none.py --installpath="${cuda_install_parent}"/ "${cuda_easyconfig}"
+ ret=$?
+ if [ $ret -ne 0 ]; then
+ eb_last_log=$(unset EB_VERBOSE; eb --last-log)
+ cp -a ${eb_last_log} .
+ fatal_error "CUDA installation failed, please check EasyBuild logs $(basename ${eb_last_log})..."
+ else
+ echo_green "CUDA installation at ${cuda_install_parent}/software/CUDA/${install_cuda_version} succeeded!"
+ fi
+ # clean up tmpdir
+ rm -rf "${tmpdir}"
+fi
diff --git a/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh b/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh
new file mode 100755
index 0000000000..e8d7f0d0a7
--- /dev/null
+++ b/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+
+# This script links host libraries related to GPU drivers to a location where
+# they can be found by the EESSI linker
+
+# Initialise our bash functions
+TOPDIR=$(dirname $(realpath $BASH_SOURCE))
+source "$TOPDIR"/../../utils.sh
+
+# We rely on ldconfig to give us the location of the libraries on the host
+command_name="ldconfig"
+# We cannot use a version of ldconfig that's being shipped under CVMFS
+exclude_prefix="/cvmfs"
+
+found_paths=()
+# Always attempt to use /sbin/ldconfig
+if [ -x "/sbin/$command_name" ]; then
+ found_paths+=("/sbin/$command_name")
+fi
+IFS=':' read -ra path_dirs <<< "$PATH"
+for dir in "${path_dirs[@]}"; do
+ if [ "$dir" = "/sbin" ]; then
+ continue # we've already checked for $command_name in /sbin, don't need to do it twice
+ fi
+ if [[ ! "$dir" =~ ^$exclude_prefix ]]; then
+ if [ -x "$dir/$command_name" ]; then
+ found_paths+=("$dir/$command_name")
+ fi
+ fi
+done
+
+if [ ${#found_paths[@]} -gt 0 ]; then
+ echo "Found $command_name in the following locations:"
+ printf -- "- %s\n" "${found_paths[@]}"
+ echo "Using first version"
+ host_ldconfig=${found_paths[0]}
+else
+ error="$command_name not found in PATH or only found in paths starting with $exclude_prefix."
+ fatal_error "$error"
+fi
+
+# Make sure EESSI is initialised (doesn't matter what version)
+check_eessi_initialised
+
+# Find the CUDA version of the host CUDA drivers
+# (making sure that this can still work inside prefix environment inside a container)
+export LD_LIBRARY_PATH=/.singularity.d/libs:$LD_LIBRARY_PATH
+nvidia_smi_command="nvidia-smi --query-gpu=driver_version --format=csv,noheader"
+if $nvidia_smi_command > /dev/null; then
+ host_driver_version=$($nvidia_smi_command | tail -n1)
+ echo_green "Found NVIDIA GPU driver version ${host_driver_version}"
+ # If the first worked, this should work too
+ host_cuda_version=$(nvidia-smi -q --display=COMPUTE | grep CUDA | awk 'NF>1{print $NF}')
+ echo_green "Found host CUDA version ${host_cuda_version}"
+else
+ error="Failed to successfully execute\n $nvidia_smi_command\n"
+ fatal_error "$error"
+fi
+
+# Let's make sure the driver libraries are not already in place
+link_drivers=1
+
+# first make sure that target of host_injections variant symlink is an existing directory
+host_injections_target=$(realpath -m ${EESSI_CVMFS_REPO}/host_injections)
+if [ ! -d ${host_injections_target} ]; then
+ create_directory_structure ${host_injections_target}
+fi
+
+host_injections_nvidia_dir="${EESSI_CVMFS_REPO}/host_injections/nvidia/${EESSI_CPU_FAMILY}"
+host_injection_driver_dir="${host_injections_nvidia_dir}/host"
+host_injection_driver_version_file="$host_injection_driver_dir/driver_version.txt"
+if [ -e "$host_injection_driver_version_file" ]; then
+ if grep -q "$host_driver_version" "$host_injection_driver_version_file"; then
+ echo_green "The host GPU driver libraries (v${host_driver_version}) have already been linked! (based on ${host_injection_driver_version_file})"
+ link_drivers=0
+ else
+ # There's something there but it is out of date
+ echo_yellow "Cleaning out outdated symlinks"
+ rm $host_injection_driver_dir/*
+ if [ $? -ne 0 ]; then
+ error="Unable to remove files under '$host_injection_driver_dir'."
+ fatal_error "$error"
+ fi
+ fi
+fi
+
+drivers_linked=0
+if [ "$link_drivers" -eq 1 ]; then
+ if ! create_directory_structure "${host_injection_driver_dir}" ; then
+ fatal_error "No write permissions to directory ${host_injection_driver_dir}"
+ fi
+ cd ${host_injection_driver_dir}
+ # Need a small temporary space to hold a couple of files
+ temp_dir=$(mktemp -d)
+
+ # Gather libraries on the host (_must_ be host ldconfig)
+ $host_ldconfig -p | awk '{print $NF}' > "$temp_dir"/libs.txt
+ # Allow for the fact that we may be in a container so the CUDA libs might be in there
+ ls /.singularity.d/libs/* >> "$temp_dir"/libs.txt 2>/dev/null
+
+ # Leverage singularity to find the full list of libraries we should be linking to
+ echo_yellow "Downloading latest version of nvliblist.conf from Apptainer to ${temp_dir}/nvliblist.conf"
+ curl --silent --output "$temp_dir"/nvliblist.conf https://raw.githubusercontent.com/apptainer/apptainer/main/etc/nvliblist.conf
+
+ # Make symlinks to all the interesting libraries
+ grep '.so$' "$temp_dir"/nvliblist.conf | xargs -i grep {} "$temp_dir"/libs.txt | xargs -i ln -s {}
+
+ # Inject driver and CUDA versions into dir
+ echo $host_driver_version > driver_version.txt
+ echo $host_cuda_version > cuda_version.txt
+ drivers_linked=1
+
+ # Remove the temporary directory when done
+ rm -r "$temp_dir"
+fi
+
+# Make latest symlink for NVIDIA drivers
+cd $host_injections_nvidia_dir
+symlink="latest"
+if [ -L "$symlink" ]; then
+ # Unless the drivers have been installed, leave the symlink alone
+ if [ "$drivers_linked" -eq 1 ]; then
+ ln -sf host latest
+ fi
+else
+ # No link exists yet
+ ln -s host latest
+fi
+
+# Make sure the libraries can be found by the EESSI linker
+host_injection_linker_dir=${EESSI_EPREFIX/versions/host_injections}
+if [ -L "$host_injection_linker_dir/lib" ]; then
+ target_path=$(readlink -f "$host_injection_linker_dir/lib")
+ if [ "$target_path" != "$$host_injections_nvidia_dir/latest" ]; then
+ cd $host_injection_linker_dir
+ ln -sf $host_injections_nvidia_dir/latest lib
+ fi
+else
+ create_directory_structure $host_injection_linker_dir
+ cd $host_injection_linker_dir
+ ln -s $host_injections_nvidia_dir/latest lib
+fi
+
+echo_green "Host NVIDIA GPU drivers linked successfully for EESSI"
diff --git a/scripts/utils.sh b/scripts/utils.sh
index d0da95e87f..b2be3f6221 100644
--- a/scripts/utils.sh
+++ b/scripts/utils.sh
@@ -14,7 +14,7 @@ ANY_ERROR_EXITCODE=1
function fatal_error() {
echo_red "ERROR: $1" >&2
if [[ $# -gt 1 ]]; then
- exit $2
+ exit "$2"
else
exit "${ANY_ERROR_EXITCODE}"
fi
@@ -32,11 +32,57 @@ function check_exit_code {
fi
}
+function check_eessi_initialised() {
+ if [[ -z "${EESSI_SOFTWARE_PATH}" ]]; then
+ fatal_error "EESSI has not been initialised!"
+ else
+ return 0
+ fi
+}
+
+function check_in_prefix_shell() {
+ # Make sure EPREFIX is defined
+ if [[ -z "${EPREFIX}" ]]; then
+ fatal_error "This script cannot be used without having first defined EPREFIX"
+ fi
+ if [[ ! ${SHELL} = ${EPREFIX}/bin/bash ]]; then
+ fatal_error "Not running in Gentoo Prefix environment, run '${EPREFIX}/startprefix' first!"
+ fi
+}
+
+function create_directory_structure() {
+ # Ensure we are given a single path argument
+ if [ $# -ne 1 ]; then
+ echo_red "Function requires a single (relative or absolute) path argument" >&2
+ return $ANY_ERROR_EXITCODE
+ fi
+ dir_structure="$1"
+
+ # Attempt to create the directory structure
+ error_message=$(mkdir -p "$dir_structure" 2>&1)
+ return_code=$?
+ # If it fails be explicit about the error
+ if [ ${return_code} -ne 0 ]; then
+ real_dir=$(realpath -m "$dir_structure")
+ echo_red "Creating ${dir_structure} (real path ${real_dir}) failed with:\n ${error_message}" >&2
+ else
+ # If we're creating it, our use case is that we want to be able to write there
+ # (this is a check in case the directory already existed)
+ if [ ! -w "${dir_structure}" ]; then
+ real_dir=$(realpath -m "$dir_structure")
+ echo_red "You do not have (required) write permissions to ${dir_structure} (real path ${real_dir})!"
+ return_code=$ANY_ERROR_EXITCODE
+ fi
+ fi
+
+ return $return_code
+}
+
function get_path_for_tool {
tool_name=$1
tool_envvar_name=$2
- which_out=$(which ${tool_name} 2>&1)
+ which_out=$(which "${tool_name}" 2>&1)
exit_code=$?
if [[ ${exit_code} -eq 0 ]]; then
echo "INFO: found tool ${tool_name} in PATH (${which_out})" >&2
@@ -68,7 +114,7 @@ function get_host_from_url {
url=$1
re="(http|https)://([^/:]+)"
if [[ $url =~ $re ]]; then
- echo ${BASH_REMATCH[2]}
+ echo "${BASH_REMATCH[2]}"
return 0
else
echo ""
@@ -80,7 +126,7 @@ function get_port_from_url {
url=$1
re="(http|https)://[^:]+:([0-9]+)"
if [[ $url =~ $re ]]; then
- echo ${BASH_REMATCH[2]}
+ echo "${BASH_REMATCH[2]}"
return 0
else
echo ""
@@ -90,7 +136,7 @@ function get_port_from_url {
function get_ipv4_address {
hname=$1
- hipv4=$(grep ${hname} /etc/hosts | grep -v '^[[:space:]]*#' | cut -d ' ' -f 1)
+ hipv4=$(grep "${hname}" /etc/hosts | grep -v '^[[:space:]]*#' | cut -d ' ' -f 1)
# TODO try other methods if the one above does not work --> tool that verifies
# what method can be used?
echo "${hipv4}"
diff --git a/tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.output b/tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.output
deleted file mode 100644
index b4dc5e9f1b..0000000000
--- a/tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.output
+++ /dev/null
@@ -1 +0,0 @@
-aarch64/arm/neoverse-n1
diff --git a/tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.output b/tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.output
deleted file mode 100644
index b4dc5e9f1b..0000000000
--- a/tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.output
+++ /dev/null
@@ -1 +0,0 @@
-aarch64/arm/neoverse-n1
diff --git a/tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.output b/tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.output
deleted file mode 100644
index 20db96d01f..0000000000
--- a/tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.output
+++ /dev/null
@@ -1 +0,0 @@
-aarch64/arm/neoverse-v1
diff --git a/tests/archdetect/aarch64/neoverse_n1/AWS-awslinux-graviton2.all.output b/tests/archdetect/aarch64/neoverse_n1/AWS-awslinux-graviton2.all.output
new file mode 100644
index 0000000000..340aaa5d02
--- /dev/null
+++ b/tests/archdetect/aarch64/neoverse_n1/AWS-awslinux-graviton2.all.output
@@ -0,0 +1 @@
+aarch64/neoverse_n1:aarch64/generic
diff --git a/tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.cpuinfo b/tests/archdetect/aarch64/neoverse_n1/AWS-awslinux-graviton2.cpuinfo
similarity index 100%
rename from tests/archdetect/aarch64/arm/neoverse-n1/AWS-awslinux-graviton2.cpuinfo
rename to tests/archdetect/aarch64/neoverse_n1/AWS-awslinux-graviton2.cpuinfo
diff --git a/tests/archdetect/aarch64/neoverse_n1/AWS-awslinux-graviton2.output b/tests/archdetect/aarch64/neoverse_n1/AWS-awslinux-graviton2.output
new file mode 100644
index 0000000000..a9bd49c75c
--- /dev/null
+++ b/tests/archdetect/aarch64/neoverse_n1/AWS-awslinux-graviton2.output
@@ -0,0 +1 @@
+aarch64/neoverse_n1
diff --git a/tests/archdetect/aarch64/neoverse_n1/Azure-Ubuntu20-Altra.all.output b/tests/archdetect/aarch64/neoverse_n1/Azure-Ubuntu20-Altra.all.output
new file mode 100644
index 0000000000..340aaa5d02
--- /dev/null
+++ b/tests/archdetect/aarch64/neoverse_n1/Azure-Ubuntu20-Altra.all.output
@@ -0,0 +1 @@
+aarch64/neoverse_n1:aarch64/generic
diff --git a/tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.cpuinfo b/tests/archdetect/aarch64/neoverse_n1/Azure-Ubuntu20-Altra.cpuinfo
similarity index 100%
rename from tests/archdetect/aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra.cpuinfo
rename to tests/archdetect/aarch64/neoverse_n1/Azure-Ubuntu20-Altra.cpuinfo
diff --git a/tests/archdetect/aarch64/neoverse_n1/Azure-Ubuntu20-Altra.output b/tests/archdetect/aarch64/neoverse_n1/Azure-Ubuntu20-Altra.output
new file mode 100644
index 0000000000..a9bd49c75c
--- /dev/null
+++ b/tests/archdetect/aarch64/neoverse_n1/Azure-Ubuntu20-Altra.output
@@ -0,0 +1 @@
+aarch64/neoverse_n1
diff --git a/tests/archdetect/aarch64/neoverse_v1/AWS-awslinux-graviton3.all.output b/tests/archdetect/aarch64/neoverse_v1/AWS-awslinux-graviton3.all.output
new file mode 100644
index 0000000000..920d5f9996
--- /dev/null
+++ b/tests/archdetect/aarch64/neoverse_v1/AWS-awslinux-graviton3.all.output
@@ -0,0 +1 @@
+aarch64/neoverse_v1:aarch64/neoverse_n1:aarch64/generic
diff --git a/tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.cpuinfo b/tests/archdetect/aarch64/neoverse_v1/AWS-awslinux-graviton3.cpuinfo
similarity index 100%
rename from tests/archdetect/aarch64/arm/neoverse-v1/AWS-awslinux-graviton3.cpuinfo
rename to tests/archdetect/aarch64/neoverse_v1/AWS-awslinux-graviton3.cpuinfo
diff --git a/tests/archdetect/aarch64/neoverse_v1/AWS-awslinux-graviton3.output b/tests/archdetect/aarch64/neoverse_v1/AWS-awslinux-graviton3.output
new file mode 100644
index 0000000000..a8e072a9c6
--- /dev/null
+++ b/tests/archdetect/aarch64/neoverse_v1/AWS-awslinux-graviton3.output
@@ -0,0 +1 @@
+aarch64/neoverse_v1
diff --git a/tests/archdetect/ppc64le/power9le/unknown-power9le.all.output b/tests/archdetect/ppc64le/power9le/unknown-power9le.all.output
new file mode 100644
index 0000000000..7ecf79d0a7
--- /dev/null
+++ b/tests/archdetect/ppc64le/power9le/unknown-power9le.all.output
@@ -0,0 +1 @@
+ppc64le/power9le:ppc64le/generic
\ No newline at end of file
diff --git a/tests/archdetect/x86_64/amd/zen2/Azure-CentOS7-7V12.all.output b/tests/archdetect/x86_64/amd/zen2/Azure-CentOS7-7V12.all.output
new file mode 100644
index 0000000000..180de26f0e
--- /dev/null
+++ b/tests/archdetect/x86_64/amd/zen2/Azure-CentOS7-7V12.all.output
@@ -0,0 +1 @@
+x86_64/amd/zen2:x86_64/generic
\ No newline at end of file
diff --git a/tests/archdetect/x86_64/amd/zen3/Azure-CentOS7-7V73X.all.output b/tests/archdetect/x86_64/amd/zen3/Azure-CentOS7-7V73X.all.output
new file mode 100644
index 0000000000..798a0aa565
--- /dev/null
+++ b/tests/archdetect/x86_64/amd/zen3/Azure-CentOS7-7V73X.all.output
@@ -0,0 +1 @@
+x86_64/amd/zen3:x86_64/amd/zen2:x86_64/generic
\ No newline at end of file
diff --git a/tests/archdetect/x86_64/intel/haswell/archspec-linux-E5-2680-v3.all.output b/tests/archdetect/x86_64/intel/haswell/archspec-linux-E5-2680-v3.all.output
new file mode 100644
index 0000000000..a047dd42cc
--- /dev/null
+++ b/tests/archdetect/x86_64/intel/haswell/archspec-linux-E5-2680-v3.all.output
@@ -0,0 +1 @@
+x86_64/intel/haswell:x86_64/generic
\ No newline at end of file
diff --git a/tests/archdetect/x86_64/intel/skylake_avx512/archspec-linux-6132.all.output b/tests/archdetect/x86_64/intel/skylake_avx512/archspec-linux-6132.all.output
new file mode 100644
index 0000000000..c9fa524ea6
--- /dev/null
+++ b/tests/archdetect/x86_64/intel/skylake_avx512/archspec-linux-6132.all.output
@@ -0,0 +1 @@
+x86_64/intel/skylake_avx512:x86_64/intel/haswell:x86_64/generic
\ No newline at end of file