diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 0000000..9f5f8c3 --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,754 @@ +name: Docker Slurm + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + workflow_dispatch: + +env: + REGISTRY_FRONTEND_IMAGE: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-frontend + REGISTRY_MASTER_IMAGE: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-master + REGISTRY_NODE_IMAGE: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-node + AWS_REGION: us-east-2 + +jobs: + + build-frontend-arm64: + runs-on: LinuxARM64-8core-32G-300Gb + #needs: docker_compose_test + timeout-minutes: 360 + permissions: + packages: write + contents: read + id-token: write + steps: + - + # Beta ARM runners do not have Docker installed + name: Install Docker + run: | + # Uninstall incompatible packages + for pkg in docker.io containerd runc; do sudo apt-get remove $pkg; done + # Add Docker's official GPG key: + sudo apt-get update + sudo apt-get install ca-certificates curl + sudo install -m 0755 -d /etc/apt/keyrings + sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc + sudo chmod a+r /etc/apt/keyrings/docker.asc + # Add the repository to Apt sources: + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt-get update -y + # Install docker packages + sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + # Allow runner use to run docker without sudo + sudo usermod -aG docker $USER + sudo apt-get install acl + sudo setfacl --modify user:$USER:rw /var/run/docker.sock + - + name: Test Docker Installation + run: docker run hello-world + - + name: Install AWS CLI + run: sudo apt install -y awscli + - + name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: ${{ env.AWS_REGION }} + role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ secrets.AWS_GITHUB_ROLE }} + role-duration-seconds: 21600 # 6 hours + role-session-name: spackstackslurmcluster-github-actions + - + name: Test authentication + run: | + aws sts get-caller-identity + - + name: Checkout repository + uses: actions/checkout@v4 + - + name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_FRONTEND_IMAGE }} + tags: | + type=raw,value=latest + flavor: | + latest=true + prefix= + suffix= + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + logout: false + - + name: Prune pre-loaded GHA docker images + run: | + docker images + docker image prune -a -f + docker images + - + name: Build and push by digest + id: build + uses: docker/build-push-action@v5 + with: + context: ./frontend + file: ./frontend/Dockerfile + platforms: linux/arm64 + labels: ${{ steps.meta.outputs.labels }} + secrets: | + "access_key_id=${{ env.AWS_ACCESS_KEY_ID }}" + "secret_access_key=${{ env.AWS_SECRET_ACCESS_KEY }}" + "session_token=${{ env.AWS_SESSION_TOKEN }}" + cache-from: type=registry,ref=ghcr.io/noaa-gsl/dockerspackstackslurmcluster/frontend-cache-arm64:cache + cache-to: type=registry,ref=ghcr.io/noaa-gsl/dockerspackstackslurmcluster/frontend-cache-arm64:cache,mode=max + outputs: type=image,name=${{ env.REGISTRY_FRONTEND_IMAGE }},push-by-digest=true,name-canonical=true,push=true + - + name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + - + name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: frontend-digests-linux-arm64 + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + - + name: Debug session + if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3 + timeout-minutes: 60 + with: + limit-access-to-actor: true + + build-frontend-amd64: + runs-on: ubuntu2204-8c-32g-300ssd + #needs: docker_compose_test + timeout-minutes: 360 + permissions: + packages: write + contents: read + id-token: write + steps: + - + name: Checkout repository + uses: actions/checkout@v4 + - + name: Install AWS CLI + run: sudo apt install -y awscli + - + name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: ${{ env.AWS_REGION }} + role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ secrets.AWS_GITHUB_ROLE }} + role-duration-seconds: 21600 # 6 hours + role-session-name: spackstackslurmcluster-github-actions + - + name: Test authentication + run: | + aws sts get-caller-identity + - + name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_FRONTEND_IMAGE }} + tags: | + type=raw,value=latest + flavor: | + latest=true + prefix= + suffix= + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + logout: false + - + name: Prune pre-loaded GHA docker images + run: | + docker images + docker image prune -a -f + docker images + - + name: Build spack-stack and push by digest + id: build + uses: docker/build-push-action@v5 + with: + context: ./frontend + file: ./frontend/Dockerfile + platforms: linux/amd64 + secrets: | + "access_key_id=${{ env.AWS_ACCESS_KEY_ID }}" + "secret_access_key=${{ env.AWS_SECRET_ACCESS_KEY }}" + "session_token=${{ env.AWS_SESSION_TOKEN }}" + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=registry,ref=ghcr.io/noaa-gsl/dockerspackstackslurmcluster/frontend-cache-amd64:cache + cache-to: type=registry,ref=ghcr.io/noaa-gsl/dockerspackstackslurmcluster/frontend-cache-amd64:cache,mode=max + outputs: type=image,name=${{ env.REGISTRY_FRONTEND_IMAGE }},push-by-digest=true,name-canonical=true,push=true + - + name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + - + name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: frontend-digests-linux-amd64 + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + - + name: Debug session + if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3 + timeout-minutes: 60 + with: + limit-access-to-actor: true + + merge-frontend: + runs-on: ubuntu-latest + needs: + - build-frontend-amd64 + - build-frontend-arm64 + steps: + - + name: Checkout repository + uses: actions/checkout@v4 + - + name: Download digests + uses: actions/download-artifact@v4 + with: + path: /tmp/digests + pattern: frontend-digests-* + merge-multiple: true + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_FRONTEND_IMAGE }} + tags: | + type=raw,value=latest + flavor: | + latest=true + prefix= + suffix= + - + name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + logout: false + - + name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.REGISTRY_FRONTEND_IMAGE }}@sha256:%s ' *) + - + name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.REGISTRY_FRONTEND_IMAGE }}:${{ steps.meta.outputs.version }} + + build-master-arm64: + runs-on: LinuxARM64-8core-32G-300Gb + #needs: docker_compose_test + timeout-minutes: 360 + permissions: + packages: write + contents: read + id-token: write + steps: + - + # Beta ARM runners do not have Docker installed + name: Install Docker + run: | + # Uninstall incompatible packages + for pkg in docker.io containerd runc; do sudo apt-get remove $pkg; done + # Add Docker's official GPG key: + sudo apt-get update + sudo apt-get install ca-certificates curl + sudo install -m 0755 -d /etc/apt/keyrings + sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc + sudo chmod a+r /etc/apt/keyrings/docker.asc + # Add the repository to Apt sources: + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt-get update -y + # Install docker packages + sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + # Allow runner use to run docker without sudo + sudo usermod -aG docker $USER + sudo apt-get install acl + sudo setfacl --modify user:$USER:rw /var/run/docker.sock + - + name: Test Docker Installation + run: docker run hello-world + - + name: Checkout repository + uses: actions/checkout@v4 + - + name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_MASTER_IMAGE }} + tags: | + type=raw,value=latest + flavor: | + latest=true + prefix= + suffix= + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + logout: false + - + name: Prune pre-loaded GHA docker images + run: | + docker images + docker image prune -a -f + docker images + - + name: Build and push by digest + id: build + uses: docker/build-push-action@v5 + with: + context: ./master + file: ./master/Dockerfile + platforms: linux/arm64 + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=registry,ref=ghcr.io/noaa-gsl/dockerspackstackslurmcluster/master-cache-arm64:cache + cache-to: type=registry,ref=ghcr.io/noaa-gsl/dockerspackstackslurmcluster/master-cache-arm64:cache,mode=max + outputs: type=image,name=${{ env.REGISTRY_MASTER_IMAGE }},push-by-digest=true,name-canonical=true,push=true + - + name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + - + name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: master-digests-linux-arm64 + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + build-master-amd64: + runs-on: ubuntu2204-8c-32g-300ssd + #needs: docker_compose_test + timeout-minutes: 360 + permissions: + packages: write + contents: read + id-token: write + steps: + - + name: Checkout repository + uses: actions/checkout@v4 + - + name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_MASTER_IMAGE }} + tags: | + type=raw,value=latest + flavor: | + latest=true + prefix= + suffix= + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + logout: false + - + name: Prune pre-loaded GHA docker images + run: | + docker images + docker image prune -a -f + docker images + - + name: Build spack-stack and push by digest + id: build + uses: docker/build-push-action@v5 + with: + context: ./master + file: ./master/Dockerfile + platforms: linux/amd64 + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=registry,ref=ghcr.io/noaa-gsl/dockerspackstackslurmcluster/master-cache-amd64:cache + cache-to: type=registry,ref=ghcr.io/noaa-gsl/dockerspackstackslurmcluster/master-cache-amd64:cache,mode=max + outputs: type=image,name=${{ env.REGISTRY_MASTER_IMAGE }},push-by-digest=true,name-canonical=true,push=true + - + name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + - + name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: master-digests-linux-amd64 + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + merge-master: + runs-on: ubuntu-latest + needs: + - build-master-amd64 + - build-master-arm64 + steps: + - + name: Checkout repository + uses: actions/checkout@v4 + - + name: Download digests + uses: actions/download-artifact@v4 + with: + path: /tmp/digests + pattern: master-digests-* + merge-multiple: true + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_MASTER_IMAGE }} + tags: | + type=raw,value=latest + flavor: | + latest=true + prefix= + suffix= + - + name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + logout: false + - + name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.REGISTRY_MASTER_IMAGE }}@sha256:%s ' *) + - + name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.REGISTRY_MASTER_IMAGE }}:${{ steps.meta.outputs.version }} + + build-node-arm64: + runs-on: LinuxARM64-8core-32G-300Gb + #needs: docker_compose_test + timeout-minutes: 360 + permissions: + packages: write + contents: read + id-token: write + steps: + - + # Beta ARM runners do not have Docker installed + name: Install Docker + run: | + # Uninstall incompatible packages + for pkg in docker.io containerd runc; do sudo apt-get remove $pkg; done + # Add Docker's official GPG key: + sudo apt-get update + sudo apt-get install ca-certificates curl + sudo install -m 0755 -d /etc/apt/keyrings + sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc + sudo chmod a+r /etc/apt/keyrings/docker.asc + # Add the repository to Apt sources: + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt-get update -y + # Install docker packages + sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + # Allow runner use to run docker without sudo + sudo usermod -aG docker $USER + sudo apt-get install acl + sudo setfacl --modify user:$USER:rw /var/run/docker.sock + - + name: Test Docker Installation + run: docker run hello-world + - + name: Checkout repository + uses: actions/checkout@v4 + - + name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_NODE_IMAGE }} + tags: | + type=raw,value=latest + flavor: | + latest=true + prefix= + suffix= + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + logout: false + - + name: Prune pre-loaded GHA docker images + run: | + docker images + docker image prune -a -f + docker images + - + name: Build and push by digest + id: build + uses: docker/build-push-action@v5 + with: + context: ./node + file: ./node/Dockerfile + platforms: linux/arm64 + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=registry,ref=ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node-cache-arm64:cache + cache-to: type=registry,ref=ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node-cache-arm64:cache,mode=max + outputs: type=image,name=${{ env.REGISTRY_NODE_IMAGE }},push-by-digest=true,name-canonical=true,push=true + - + name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + - + name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: node-digests-linux-arm64 + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + build-node-amd64: + runs-on: ubuntu2204-8c-32g-300ssd + #needs: docker_compose_test + timeout-minutes: 360 + permissions: + packages: write + contents: read + id-token: write + steps: + - + name: Checkout repository + uses: actions/checkout@v4 + - + name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_NODE_IMAGE }} + tags: | + type=raw,value=latest + flavor: | + latest=true + prefix= + suffix= + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + logout: false + - + name: Prune pre-loaded GHA docker images + run: | + docker images + docker image prune -a -f + docker images + - + name: Build spack-stack and push by digest + id: build + uses: docker/build-push-action@v5 + with: + context: ./node + file: ./node/Dockerfile + platforms: linux/amd64 + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=registry,ref=ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node-cache-amd64:cache + cache-to: type=registry,ref=ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node-cache-amd64:cache,mode=max + outputs: type=image,name=${{ env.REGISTRY_NODE_IMAGE }},push-by-digest=true,name-canonical=true,push=true + - + name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + - + name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: node-digests-linux-amd64 + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + merge-node: + runs-on: ubuntu-latest + needs: + - build-node-amd64 + - build-node-arm64 + steps: + - + name: Checkout repository + uses: actions/checkout@v4 + - + name: Download digests + uses: actions/download-artifact@v4 + with: + path: /tmp/digests + pattern: node-digests-* + merge-multiple: true + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_NODE_IMAGE }} + tags: | + type=raw,value=latest + flavor: | + latest=true + prefix= + suffix= + - + name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + logout: false + - + name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.REGISTRY_NODE_IMAGE }}@sha256:%s ' *) + - + name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.REGISTRY_NODE_IMAGE }}:${{ steps.meta.outputs.version }} + + docker-compose-test: + runs-on: ubuntu2204-8c-32g-300ssd + needs: + - merge-frontend + - merge-master + - merge-node + steps: + - + name: Checkout Repository + uses: actions/checkout@v4 + - + name: Build and start containers + run: docker compose -f docker-compose.yml up --build -d + + - + name: Check cluster logs + run: docker compose -f docker-compose.yml logs + + - + name: Check status of the cluster containers + run: docker compose -f docker-compose.yml ps + + - + name: Check status of Slurm + run: docker exec spack-stack-frontend sinfo + + - + name: Run a Slurm job + run: docker exec spack-stack-frontend srun hostname + + - + name: Test ssh access to Slurm compute nodes + run: | + docker exec spack-stack-frontend timeout 1s ssh slurmnode1 hostname + docker exec spack-stack-frontend timeout 1s ssh slurmnode2 hostname + docker exec spack-stack-frontend timeout 1s ssh slurmnode3 hostname + + - + name: Load spack-stack envs + run: | + docker exec spack-stack-frontend bash -l -c "module use /opt/spack-stack/envs/unified-env/install/modulefiles/Core ; module load stack-gcc stack-openmpi stack-python jedi-mpas-env; module list" + docker exec spack-stack-frontend bash -l -c "module use /opt/spack-stack/envs/unified-env/install/modulefiles/Core ; module load stack-gcc stack-openmpi stack-python jedi-fv3-env; module list" + + - + name: Shut down Slurm cluster containers + run: docker compose -f docker-compose.yml down diff --git a/.github/workflows/package-cleanup.yaml b/.github/workflows/package-cleanup.yaml new file mode 100644 index 0000000..9133316 --- /dev/null +++ b/.github/workflows/package-cleanup.yaml @@ -0,0 +1,88 @@ +name: PackageCleanup + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + workflow_dispatch: + +jobs: + cleanup-packages: + runs-on: ubuntu-latest + permissions: + packages: write + contents: read + steps: + - + name: Remove untagged versions of dockerspackstackslurmcluster/slurm-frontend + uses: actions/delete-package-versions@v5 + with: + package-name: 'dockerspackstackslurmcluster/slurm-frontend' + package-type: 'container' + min-versions-to-keep: 0 + delete-only-untagged-versions: 'true' + - + name: Remove untagged versions of dockerspackstackslurmcluster/slurm-master + uses: actions/delete-package-versions@v5 + with: + package-name: 'dockerspackstackslurmcluster/slurm-master' + package-type: 'container' + min-versions-to-keep: 0 + delete-only-untagged-versions: 'true' + - + name: Remove untagged versions of dockerspackstackslurmcluster/slurm-node + uses: actions/delete-package-versions@v5 + with: + package-name: 'dockerspackstackslurmcluster/slurm-node' + package-type: 'container' + min-versions-to-keep: 0 + delete-only-untagged-versions: 'true' + - + name: Remove untagged versions of dockerspackstackslurmcluster/frontend-cache-amd64 + uses: actions/delete-package-versions@v5 + with: + package-name: 'dockerspackstackslurmcluster/frontend-cache-amd64' + package-type: 'container' + min-versions-to-keep: 0 + delete-only-untagged-versions: 'true' + - + name: Remove untagged versions of dockerspackstackslurmcluster/frontend-cache-arm64 + uses: actions/delete-package-versions@v5 + with: + package-name: 'dockerspackstackslurmcluster/frontend-cache-arm64' + package-type: 'container' + min-versions-to-keep: 0 + delete-only-untagged-versions: 'true' + - + name: Remove untagged versions of dockerspackstackslurmcluster/master-cache-amd64 + uses: actions/delete-package-versions@v5 + with: + package-name: 'dockerspackstackslurmcluster/master-cache-amd64' + package-type: 'container' + min-versions-to-keep: 0 + delete-only-untagged-versions: 'true' + - + name: Remove untagged versions of dockerspackstackslurmcluster/master-cache-arm64 + uses: actions/delete-package-versions@v5 + with: + package-name: 'dockerspackstackslurmcluster/master-cache-arm64' + package-type: 'container' + min-versions-to-keep: 0 + delete-only-untagged-versions: 'true' + - + name: Remove untagged versions of dockerspackstackslurmcluster/node-cache-amd64 + uses: actions/delete-package-versions@v5 + with: + package-name: 'dockerspackstackslurmcluster/node-cache-amd64' + package-type: 'container' + min-versions-to-keep: 0 + delete-only-untagged-versions: 'true' + - + name: Remove untagged versions of dockerspackstackslurmcluster/node-cache-arm64 + uses: actions/delete-package-versions@v5 + with: + package-name: 'dockerspackstackslurmcluster/node-cache-arm64' + package-type: 'container' + min-versions-to-keep: 0 + delete-only-untagged-versions: 'true' diff --git a/docker-compose.yml b/docker-compose.yml index d6cb840..2c8f9fe 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -46,9 +46,6 @@ services: links: - slurmmaster slurmnode2: - build: - context: ./node - dockerfile: ./Dockerfile image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest container_name: spack-stack-node2 hostname: slurmnode2 @@ -62,9 +59,6 @@ services: links: - slurmmaster slurmnode3: - build: - context: ./node - dockerfile: ./Dockerfile image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest container_name: spack-stack-node3 hostname: slurmnode3 @@ -78,9 +72,6 @@ services: links: - slurmmaster slurmnode4: - build: - context: ./node - dockerfile: ./Dockerfile image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest container_name: spack-stack-node4 hostname: slurmnode4 @@ -94,9 +85,6 @@ services: links: - slurmmaster slurmnode5: - build: - context: ./node - dockerfile: ./Dockerfile image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest container_name: spack-stack-node5 hostname: slurmnode5 diff --git a/frontend/Dockerfile b/frontend/Dockerfile index eb1d2a0..3245dd4 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -1,22 +1,13 @@ -FROM ghcr.io/noaa-gsl/dockerslurmcluster/slurm-frontend:latest +FROM ghcr.io/noaa-gsl/dockerslurmcluster/slurm-frontend:latest AS builder ENV DEBIAN_FRONTEND=noninteractive ENV TZ=Etc/UTC SHELL ["/bin/bash", "-c"] -# Copy patch files into /tmp for later use -COPY cc.patch.aarch64 /tmp -COPY cc.patch.x86_64 /tmp -COPY openmpi.package.py.patch.aarch64 /tmp -COPY openmpi.package.py.patch.x86_64 /tmp - -RUN --mount=type=secret,id=access_key_id --mount=type=secret,id=secret_access_key --mount=type=secret,id=session_token <> /etc/bash.bashrc \ + && echo "source /usr/lmod/lmod/init/bash" >> /etc/profile \ + && popd \ + && rm -rf lua* Lmod + +# Copy patch files into /tmp for use when installing spack-stack +COPY cc.patch.aarch64 /tmp +COPY cc.patch.x86_64 /tmp +COPY openmpi.package.py.patch.aarch64 /tmp +COPY openmpi.package.py.patch.x86_64 /tmp - - # Install Lua - wget https://sourceforge.net/projects/lmod/files/lua-5.1.4.9.tar.bz2 - tar xvfj lua-5.1.4.9.tar.bz2 - pushd lua-5.1.4.9 - ./configure --prefix=/usr - make -j 4 - make install - popd - - # Install Lmod - git clone --recursive https://github.com/TACC/Lmod.git - pushd Lmod - ./configure --prefix=/usr - make -j 4 - make install - echo "source /usr/lmod/lmod/init/bash" >> /etc/bash.bashrc - popd - - # Cleanup - rm -rf lua* Lmod - +# Clone spack-stack and create and configure the unified env +RUN cd /opt \ + && git clone -b release/1.8.0 --recurse-submodules https://github.com/jcsda/spack-stack.git \ + && pushd spack-stack \ + && . ./setup.sh \ + && pushd spack \ + && mv /tmp/cc.patch.$(uname -m) cc.patch \ + && mv /tmp/openmpi.package.py.patch.$(uname -m) openmpi.package.py.patch \ + && patch -f -p0 < openmpi.package.py.patch \ + && patch -f -p0 < cc.patch \ + && popd \ + && spack stack create env --site linux.default --template unified-dev --name unified-env --compiler gcc \ + && pushd envs/unified-env \ + && spack env activate . \ + && spack mirror add --s3-access-key-id "" --s3-access-key-secret "" s3_spack_stack_buildcache_ro s3://chiltepin-us-east-2/spack-stack/ \ + && export SPACK_SYSTEM_CONFIG_PATH="$PWD/site" \ + && spack external find --scope system \ + --exclude cmake \ + --exclude curl \ + --exclude openssl \ + --exclude openssh \ + --exclude python \ + && spack external find --scope system wget \ + && spack compiler find --scope system \ + && unset SPACK_SYSTEM_CONFIG_PATH \ + && spack config add "packages:all:compiler:[gcc@11.4.0]" \ + && spack config add "packages:all:providers:mpi:[openmpi@4.1.6]" \ + && spack config add "packages:fontconfig:variants:+pic" \ + && spack config add "packages:pixman:variants:+pic" \ + && spack config add "packages:cairo:variants:+pic" \ + && spack config --scope env:/opt/spack-stack/envs/unified-env:common add "packages:openmpi:require:~internal-hwloc +two_level_namespace schedulers=slurm +pmi" \ + && echo " slurm:" >> /opt/spack-stack/envs/unified-env/site/packages.yaml \ + && echo " externals:" >> /opt/spack-stack/envs/unified-env/site/packages.yaml \ + && echo " - spec: slurm@23.11.7" >> /opt/spack-stack/envs/unified-env/site/packages.yaml \ + && echo " prefix: /usr" >> /opt/spack-stack/envs/unified-env/site/packages.yaml \ + && echo " buildable: false" >> /opt/spack-stack/envs/unified-env/site/packages.yaml \ + && if [ "$(uname -m)" == "aarch64" ]; then \ + spack config --scope env:/opt/spack-stack/envs/unified-env:common remove "packages:wgrib2" ; \ + spack config --scope env:/opt/spack-stack/envs/unified-env:common remove "modules:default:lmod:wgrib2" ; \ + fi \ + && sed -i 's/tcl/lmod/g' site/modules.yaml \ + && sed -i 's/tcl/lmod/g' common/modules.yaml \ + && sed -i 's:{^mpi.name}/{^mpi.version}/{compiler.name}/{compiler.version}/{name}:{name}:' common/modules.yaml \ + && sed -i 's:{compiler.name}/{compiler.version}/{name}:{name}:' common/modules.yaml + +# Concretize the Spack environment +RUN cd /opt/spack-stack \ + && . ./setup.sh \ + && cd /opt/spack-stack/envs/unified-env \ + && spack env activate . \ + && spack concretize 2>&1 | tee log.concretize + +# Install the Spack environment +RUN --mount=type=secret,id=access_key_id --mount=type=secret,id=secret_access_key --mount=type=secret,id=session_token <> /opt/spack-stack/envs/unified-env/site/packages.yaml - echo " externals:" >> /opt/spack-stack/envs/unified-env/site/packages.yaml - echo " - spec: slurm@23.11.7" >> /opt/spack-stack/envs/unified-env/site/packages.yaml - echo " prefix: /usr" >> /opt/spack-stack/envs/unified-env/site/packages.yaml - echo " buildable: false" >> /opt/spack-stack/envs/unified-env/site/packages.yaml - - # Make sure we use Lmod modules - sed -i 's/tcl/lmod/g' site/modules.yaml - sed -i 's/tcl/lmod/g' common/modules.yaml - - # Remove unwanted path prefixes for modules - sed -i 's:{^mpi.name}/{^mpi.version}/{compiler.name}/{compiler.version}/{name}:{name}:' common/modules.yaml - sed -i 's:{compiler.name}/{compiler.version}/{name}:{name}:' common/modules.yaml - - # Concretize the environment - spack concretize 2>&1 | tee log.concretize - spack mirror list - # Install the environment, use autopush buildcache if credentials were provided spack install --no-check-signature # 2>&1 | tee log.install if [ -f /run/secrets/access_key_id ]; then spack buildcache update-index s3_spack_stack_buildcache_rw fi +EOF - # Generate modules - source /usr/lmod/lmod/init/bash - spack module lmod refresh -y - spack stack setup-meta-modules - - # Cleanup unneccessary packages - spack gc -y - - # Do not strip binaries, it breaks JEDI/UFS builds - - # Cleanup unneeded .spack directories - find /opt/spack-stack/envs/unified-env/install/gcc/11.4.0 -name .spack -type d -print0 | xargs -0 rm -rf "{}" - rm -rf ~/.spack +# Create the modulefiles and cleanup +RUN cd /opt \ + && pushd spack-stack \ + && . ./setup.sh \ + && pushd envs/unified-env \ + && spack env activate . \ + && source /usr/lmod/lmod/init/bash \ + && spack module lmod refresh -y \ + && spack stack setup-meta-modules \ + && spack gc -y \ + && find /opt/spack-stack/envs/unified-env/install/gcc/11.4.0 -name .spack -type d -print0 | xargs -0 rm -rf "{}" \ + && rm -rf ~/.spack + +# Copy installed environment into final images +FROM ghcr.io/noaa-gsl/dockerslurmcluster/slurm-frontend:latest - # Cleanup /tmp - rm -rf /tmp/* -EOF +COPY --from=builder /usr /usr +COPY --from=builder /etc /etc +COPY --from=builder /opt/spack-stack/envs/unified-env/install /opt/spack-stack/envs/unified-env/install