Skip to content

Commit

Permalink
Merge pull request #112 from RIVM-bioinformatics/dev
Browse files Browse the repository at this point in the history
Containerization of workflow
  • Loading branch information
florianzwagemaker authored Jan 24, 2025
2 parents a6d8efb + 30be0df commit edf726c
Show file tree
Hide file tree
Showing 40 changed files with 1,622 additions and 255 deletions.
94 changes: 94 additions & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
name: build containers and run tests

on:
pull_request:
branches:
- 'main'
workflow_dispatch:


jobs:
Setup_and_build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- name: Setup Mamba
uses: mamba-org/setup-micromamba@v1
with:
cache-environment: true
post-cleanup: 'all'
environment-file: env.yml
init-shell: bash

- name: Install local python package
run: |
pip install . --no-deps
shell: micromamba-shell {0}

- name: build containers
run: |
python containers/build_containers.py
env:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
shell: micromamba-shell {0}

- name: zip built containers
run: |
cd ./containers/
tar -czvf containers.tar.gz builtcontainers.json $(find . -type f -name "*.tar" -printf '%f ')
- name: Upload container artifacts
uses: actions/upload-artifact@v4
with:
name: built_containers
path: ./containers/containers.tar.gz
overwrite: true

Test:
runs-on: ubuntu-latest
needs: Setup_and_build
steps:
- uses: actions/checkout@v3

- uses: actions/download-artifact@v4
with:
name: built_containers

- name: move artifact
run: |
mv ./containers.tar.gz ./containers/containers.tar.gz
- name: unzip built containers
run: |
cd ./containers/
tar -xzvf containers.tar.gz
cd ..
- name: Setup Apptainer
uses: eWaterCycle/setup-apptainer@v2

- name: Setup Mamba
uses: mamba-org/setup-micromamba@v1
with:
cache-environment: true
post-cleanup: 'all'
environment-file: env.yml
init-shell: bash

- name: Install local python package
run: |
pip install . --no-deps
shell: micromamba-shell {0}

- name: convert containers
run: |
python containers/convert_artifact_containers_for_apptainer.py
- name: download existing containers
run: |
python containers/pull_published_containers.py
shell: micromamba-shell {0}

## rest of the testing suite here
63 changes: 63 additions & 0 deletions .github/workflows/publish_containers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
name: Publish containers

on:
release:
types:
- published
workflow_dispatch:


jobs:
Upload:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- name: Download artifact
id: download-artifact
uses: dawidd6/action-download-artifact@v8
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
workflow: build_and_test.yml
name: built_containers
skip_unpack: false

- name: move artifact
run: |
mv ./containers.tar.gz ./containers/containers.tar.gz
- name: unzip built containers
run: |
cd ./containers/
tar -xzvf containers.tar.gz
cd ..
- name: Setup Mamba
uses: mamba-org/setup-micromamba@v1
with:
cache-environment: true
post-cleanup: 'all'
environment-file: env.yml
init-shell: bash

- name: Install local python package
run: |
pip install . --no-deps
shell: micromamba-shell {0}

- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Add artifacted containers to docker daemon
run: |
python containers/add_OCI_to_docker_engine.py
shell: micromamba-shell {0}

- name: tag and push containers
run: |
python containers/tag_and_push_containers.py
shell: micromamba-shell {0}
59 changes: 50 additions & 9 deletions ViroConstrictor/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
from ViroConstrictor.runconfigs import GetSnakemakeRunDetails, WriteYaml
from ViroConstrictor.runreport import WriteReport
from ViroConstrictor.update import update
from ViroConstrictor.workflow.containers import (
construct_container_bind_args,
download_containers,
)


def get_preset_warning_list(
Expand Down Expand Up @@ -67,7 +71,13 @@ def get_preset_warning_list(
This applies to the following samples:\n{''.join(samples)}"""
preset_score_warnings.append(warn)

p_fallbackwarning_df = sample_info_df.loc[sample_info_df["PRESET_SCORE"] == 0.0]
# check if the preset score is larger or equal than 0.0 and smaller than 0.000001 (1e-6)
# We do this because the preset score is a float and we want to check if it is within a certain range as floating point equality checks are not reliable
p_fallbackwarning_df = sample_info_df.loc[
(sample_info_df["PRESET_SCORE"] >= 0.0)
& (sample_info_df["PRESET_SCORE"] < 1e-6)
]

targets, presets = (
(
list(x)
Expand Down Expand Up @@ -151,7 +161,19 @@ def main() -> NoReturn:
inputs_obj=parsed_input, samplesheetfilename="samples_main"
)

# if configured to use containers, check if they are available and download them if necessary
# TODO: add the verbosity flag to the download_containers function and update log message to reflect this
if (
snakemake_run_details.snakemake_run_conf["use-singularity"]
and download_containers(snakemake_run_details.snakemake_run_conf) != 0
):
log.error(
"Failed to download containers required for workflow.\nPlease check the logs and your settings for more information and try again later."
)
sys.exit(1)

log.info(f"{'='*20} [bold yellow] Starting Main Workflow [/bold yellow] {'='*20}")

status: bool = False
if parsed_input.user_config["COMPUTING"]["compmode"] == "local":
status = snakemake.snakemake(
Expand All @@ -160,22 +182,29 @@ def main() -> NoReturn:
cores=snakemake_run_details.snakemake_run_conf["cores"],
use_conda=snakemake_run_details.snakemake_run_conf["use-conda"],
conda_frontend="mamba",
use_singularity=snakemake_run_details.snakemake_run_conf["use-singularity"],
singularity_args=construct_container_bind_args(parsed_input.samples_dict),
jobname=snakemake_run_details.snakemake_run_conf["jobname"],
latency_wait=snakemake_run_details.snakemake_run_conf["latency-wait"],
dryrun=snakemake_run_details.snakemake_run_conf["dryrun"],
force_incomplete=snakemake_run_details.snakemake_run_conf["force-incomplete"],
configfiles=[
WriteYaml(
snakemake_run_details.snakemake_run_parameters,
f"{parsed_input.workdir}/config/run_params.yaml",
)
),
WriteYaml(
snakemake_run_details.snakemake_run_conf,
f"{parsed_input.workdir}/config/run_configs.yaml",
),
],
restart_times=3,
keepgoing=True,
restart_times=snakemake_run_details.snakemake_run_conf["restart-times"],
keepgoing=snakemake_run_details.snakemake_run_conf["keep-going"],
quiet=["all"], # type: ignore
log_handler=[
ViroConstrictor.logging.snakemake_logger(logfile=parsed_input.logfile)
],
printshellcmds=False,
printshellcmds=snakemake_run_details.snakemake_run_conf["printshellcmds"],
)
if parsed_input.user_config["COMPUTING"]["compmode"] == "grid":
status = snakemake.snakemake(
Expand All @@ -185,23 +214,31 @@ def main() -> NoReturn:
nodes=snakemake_run_details.snakemake_run_conf["cores"],
use_conda=snakemake_run_details.snakemake_run_conf["use-conda"],
conda_frontend="mamba",
use_singularity=snakemake_run_details.snakemake_run_conf["use-singularity"],
singularity_args=construct_container_bind_args(parsed_input.samples_dict),
jobname=snakemake_run_details.snakemake_run_conf["jobname"],
latency_wait=snakemake_run_details.snakemake_run_conf["latency-wait"],
drmaa=snakemake_run_details.snakemake_run_conf["drmaa"],
drmaa_log_dir=snakemake_run_details.snakemake_run_conf["drmaa-log-dir"],
dryrun=snakemake_run_details.snakemake_run_conf["dryrun"],
force_incomplete=snakemake_run_details.snakemake_run_conf["force-incomplete"],
configfiles=[
WriteYaml(
snakemake_run_details.snakemake_run_parameters,
f"{parsed_input.workdir}/config/run_params.yaml",
)
),
WriteYaml(
snakemake_run_details.snakemake_run_conf,
f"{parsed_input.workdir}/config/run_configs.yaml",
),
],
restart_times=3,
keepgoing=True,
restart_times=snakemake_run_details.snakemake_run_conf["restart-times"],
keepgoing=snakemake_run_details.snakemake_run_conf["keep-going"],
quiet=["all"], # type: ignore
log_handler=[
ViroConstrictor.logging.snakemake_logger(logfile=parsed_input.logfile)
],
printshellcmds=snakemake_run_details.snakemake_run_conf["printshellcmds"],
)

if snakemake_run_details.snakemake_run_conf["dryrun"] is False and status is True:
Expand All @@ -213,7 +250,11 @@ def main() -> NoReturn:
WriteYaml(
snakemake_run_details.snakemake_run_parameters,
f"{parsed_input.workdir}/config/run_params.yaml",
)
),
WriteYaml(
snakemake_run_details.snakemake_run_conf,
f"{parsed_input.workdir}/config/run_configs.yaml",
),
],
quiet=["all"], # type: ignore
log_handler=[
Expand Down
5 changes: 3 additions & 2 deletions ViroConstrictor/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def _split_paragraphs(self, text: str) -> list[str]:
"""Split text in to paragraphs of like-indented lines."""

text = textwrap.dedent(text).strip()
text = re.sub("\n\n[\n]+", "\n\n", text)
text = re.sub("\n\n\n+", "\n\n", text)

last_sub_indent: Optional[int] = None
paragraphs: list[str] = []
Expand Down Expand Up @@ -157,7 +157,8 @@ def pathCompleter(self, text: str, state: int) -> str:
if os.path.isdir(text):
text += "/"

return list(glob.glob(f"{text}*"))[state]
# we explicitly to a list comprehension here instead of a call to the constructor as the this would otherwise break the autocompletion functionality of paths.
return [x for x in glob.glob(f"{text}*")][state]

def createListCompleter(self, ll: list[str]) -> None:
"""
Expand Down
13 changes: 5 additions & 8 deletions ViroConstrictor/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
import pathlib
import re
from typing import Any
from typing import Any, Callable

from rich.color import ANSI_COLOR_NAMES
from rich.default_styles import DEFAULT_STYLES
Expand Down Expand Up @@ -219,8 +219,9 @@ def print_jobstatistics_logmessage(msg: dict) -> None:
log.info(f"Job statistics:\n[yellow]{logmessage}[/yellow]")


logmessage_strings_info: dict[str, Any] = {
logmessage_strings_info: dict[str, Callable] = {
"Activating conda environment": ColorizeLogMessagePath,
"Activating singularity image": ColorizeLogMessagePath,
"Building DAG of jobs": BaseLogMessage,
"Creating conda environment": ColorizeLogMessagePath,
"Removing incomplete Conda environment": ColorizeLogMessagePath,
Expand Down Expand Up @@ -262,15 +263,11 @@ def log_handler(msg: dict) -> None:
loglevel = msg.get("level")
logmessage = msg.get("msg")

if loglevel == "dag_debug":
return None
if loglevel == "debug":
return None
if loglevel == "shellcmd":
if loglevel in ["dag_debug", "debug", "shellcmd"]:
return None

if logmessage is not None and any(
x for x in logmessage_suppressed_strings_warning if x in logmessage
x in logmessage for x in logmessage_suppressed_strings_warning
):
return None

Expand Down
Loading

0 comments on commit edf726c

Please sign in to comment.