From f9d07efc3014df6085d1455dffa19745b50c5667 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 5 Oct 2022 17:04:29 +0200 Subject: [PATCH 01/42] Add -t / --tower option to 'nf-core download'. --- nf_core/__main__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 735eb99e04..725bcfc895 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -215,6 +215,7 @@ def launch(pipeline, id, revision, command_only, params_in, params_out, save_all "-x", "--compress", type=click.Choice(["tar.gz", "tar.bz2", "zip", "none"]), help="Archive compression type" ) @click.option("-f", "--force", is_flag=True, default=False, help="Overwrite existing files") +@click.option("-t", "--tower", is_flag=True, default=False, help="Customize download for sequeralabs® Nextflow Tower") @click.option( "-c", "--container", type=click.Choice(["none", "singularity"]), help="Download software container images" ) @@ -223,7 +224,7 @@ def launch(pipeline, id, revision, command_only, params_in, params_out, save_all help="Don't / do copy images to the output directory and set 'singularity.cacheDir' in workflow", ) @click.option("-p", "--parallel-downloads", type=int, default=4, help="Number of parallel image downloads") -def download(pipeline, revision, outdir, compress, force, container, singularity_cache_only, parallel_downloads): +def download(pipeline, revision, outdir, compress, force, tower, container, singularity_cache_only, parallel_downloads): """ Download a pipeline, nf-core/configs and pipeline singularity images. @@ -233,7 +234,7 @@ def download(pipeline, revision, outdir, compress, force, container, singularity from nf_core.download import DownloadWorkflow dl = DownloadWorkflow( - pipeline, revision, outdir, compress, force, container, singularity_cache_only, parallel_downloads + pipeline, revision, outdir, compress, force, tower, container, singularity_cache_only, parallel_downloads ) dl.download_workflow() From e94dce06c071f0b9fc957ec76ad7fc64bb29630f Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 5 Dec 2022 18:04:40 +0100 Subject: [PATCH 02/42] Intermediate commit --- nf_core/download.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nf_core/download.py b/nf_core/download.py index cd36c65c4a..8ecd2fd85c 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -73,6 +73,7 @@ class DownloadWorkflow: pipeline (str): A nf-core pipeline name. revision (str): The workflow revision to download, like `1.0`. Defaults to None. singularity (bool): Flag, if the Singularity container should be downloaded as well. Defaults to False. + tower (bool): Flag, to customize the download for Nextflow Tower (convert to git bare repo). Defaults to False. outdir (str): Path to the local download directory. Defaults to None. """ @@ -83,6 +84,7 @@ def __init__( outdir=None, compress_type=None, force=False, + tower=False, container=None, singularity_cache_only=False, parallel_downloads=4, @@ -93,6 +95,7 @@ def __init__( self.output_filename = None self.compress_type = compress_type self.force = force + self.tower = tower self.container = container self.singularity_cache_only = singularity_cache_only self.parallel_downloads = parallel_downloads From cba336b4970635703736fd8bc20995c753d6253b Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 15 Feb 2023 16:04:41 +0100 Subject: [PATCH 03/42] Implement logic for the Tower download in DownloadWorkflow:download_workflow() --- nf_core/download.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/nf_core/download.py b/nf_core/download.py index 8ecd2fd85c..c3451818e8 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -135,7 +135,10 @@ def download_workflow(self): summary_log.append(f"Using [blue]$NXF_SINGULARITY_CACHEDIR[/]': {os.environ['NXF_SINGULARITY_CACHEDIR']}") # Set an output filename now that we have the outdir - if self.compress_type is not None: + if self.tower: + self.output_filename = f"{self.outdir}.git" + summary_log.append(f"Output file (Tower enabled): '{self.output_filename}'") + elif self.compress_type is not None: self.output_filename = f"{self.outdir}.{self.compress_type}" summary_log.append(f"Output file: '{self.output_filename}'") else: @@ -160,6 +163,13 @@ def download_workflow(self): # Summary log log.info("Saving '{}'\n {}".format(self.pipeline, "\n ".join(summary_log))) + # Actually download the workflow + if not self.tower: + self.download_workflow_classic() + else: + self.download_workflow_tower() + + def download_workflow_classic(self): # Download the pipeline files log.info("Downloading workflow files from GitHub") self.download_wf_files() @@ -188,6 +198,15 @@ def download_workflow(self): log.info("Compressing download..") self.compress_download() + def download_workflow_tower(self): + # Create a bare-cloned git repository of the workflow that includes the configs + log.info("Cloning workflow files from GitHub") + self.clone_wf_files() + + # Download the centralised configs + log.info("Downloading centralised configs from GitHub") + self.download_configs() + def prompt_pipeline_name(self): """Prompt for the pipeline name if not set with a flag""" From 15c0588a88ac50b98c89d12dbe8ed40bff44a369 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 15 Feb 2023 18:40:22 +0100 Subject: [PATCH 04/42] Extend ModulesRepo:setup_local_repo() with a cache_only bool, so we can use ModuleRepo as superclass to the new WorkflowRepo. --- nf_core/modules/modules_repo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 5f77148867..0f5db4bc52 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -11,7 +11,7 @@ import nf_core.modules.modules_json import nf_core.modules.modules_utils -from nf_core.utils import NFCORE_DIR, load_tools_config +from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR, load_tools_config log = logging.getLogger(__name__) @@ -166,7 +166,7 @@ def verify_sha(self, prompt, sha): return True - def setup_local_repo(self, remote, branch, hide_progress=True): + def setup_local_repo(self, remote, branch, hide_progress=True, cache_only=False): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from @@ -177,7 +177,7 @@ def setup_local_repo(self, remote, branch, hide_progress=True): branch (str): name of branch to use Sets self.repo """ - self.local_repo_dir = os.path.join(NFCORE_DIR, self.fullname) + self.local_repo_dir = os.path.join(NFCORE_DIR if not cache_only else NFCORE_CACHE_DIR, self.fullname) try: if not os.path.exists(self.local_repo_dir): try: From 2be88c4d475f4388a3ab360dcb5ec8ddfb43ffb0 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 15 Feb 2023 20:11:22 +0100 Subject: [PATCH 05/42] Create WorkflowRepo subclass of ModuleRepo and initialise local clone. --- nf_core/__main__.py | 2 +- nf_core/download.py | 41 +++++++++++++++++++++++++++++---- nf_core/modules/modules_repo.py | 4 ++-- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 725bcfc895..521454eb99 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -215,7 +215,7 @@ def launch(pipeline, id, revision, command_only, params_in, params_out, save_all "-x", "--compress", type=click.Choice(["tar.gz", "tar.bz2", "zip", "none"]), help="Archive compression type" ) @click.option("-f", "--force", is_flag=True, default=False, help="Overwrite existing files") -@click.option("-t", "--tower", is_flag=True, default=False, help="Customize download for sequeralabs® Nextflow Tower") +@click.option("-t", "--tower", is_flag=True, default=False, help="Customize download for seqeralabs® Nextflow Tower") @click.option( "-c", "--container", type=click.Choice(["none", "singularity"]), help="Download software container images" ) diff --git a/nf_core/download.py b/nf_core/download.py index c3451818e8..5c2ff1607d 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -3,6 +3,7 @@ from __future__ import print_function import concurrent.futures +from git import Repo import io import logging import os @@ -23,6 +24,7 @@ import nf_core import nf_core.list import nf_core.utils +from nf_core.modules import ModulesRepo # to create subclass WorkflowRepo log = logging.getLogger(__name__) stderr = rich.console.Console( @@ -137,13 +139,15 @@ def download_workflow(self): # Set an output filename now that we have the outdir if self.tower: self.output_filename = f"{self.outdir}.git" - summary_log.append(f"Output file (Tower enabled): '{self.output_filename}'") + summary_log.append(f"Output file: '{self.output_filename}'") elif self.compress_type is not None: self.output_filename = f"{self.outdir}.{self.compress_type}" summary_log.append(f"Output file: '{self.output_filename}'") else: summary_log.append(f"Output directory: '{self.outdir}'") + summary_log.append(f"Enabled for seqeralabs® Nextflow Tower: '{self.tower}'") + # Check that the outdir doesn't already exist if os.path.exists(self.outdir): if not self.force: @@ -170,6 +174,7 @@ def download_workflow(self): self.download_workflow_tower() def download_workflow_classic(self): + """Downloads a nf-core workflow from GitHub to the local file system in a self-contained manner.""" # Download the pipeline files log.info("Downloading workflow files from GitHub") self.download_wf_files() @@ -199,9 +204,10 @@ def download_workflow_classic(self): self.compress_download() def download_workflow_tower(self): - # Create a bare-cloned git repository of the workflow that includes the configs + """Create a bare-cloned git repository of the workflow that includes the configurations, such it can be launched with `tw launch` as file:/ pipeline""" log.info("Cloning workflow files from GitHub") - self.clone_wf_files() + + self.workflow_repo = WorkflowRepo(remote_url=f"git@github.com:{self.pipeline}.git", branch=self.revision) # Download the centralised configs log.info("Downloading centralised configs from GitHub") @@ -816,5 +822,32 @@ def compress_download(self): log.debug(f"Deleting uncompressed files: '{self.outdir}'") shutil.rmtree(self.outdir) - # Caclualte md5sum for output file + # Calculate md5sum for output file log.info(f"MD5 checksum for '{self.output_filename}': [blue]{nf_core.utils.file_md5(self.output_filename)}[/]") + + +class WorkflowRepo(ModulesRepo): + """ + An object to store details about a locally cached workflow repository. + + Important Attributes: + fullname: The full name of the repository, ``nf-core/{self.pipelinename}``. + local_repo_dir (str): The local directory, where the workflow is cloned into. Defaults to ``$HOME/.cache/nf-core/nf-core/{self.pipeline}``. + + """ + + def __init__(self, remote_url=None, branch=None, no_pull=False, hide_progress=False, in_cache=True): + """ + Initializes the object and clones the workflows git repository if it is not already present + + Args: + remote_url (str, optional): The URL of the remote repository. Defaults to None. + branch (str, optional): The branch to clone. Defaults to None. + no_pull (bool, optional): Whether to skip the pull step. Defaults to False. + hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. + in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. + """ + self.remote_url = remote_url + self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) + + self.setup_local_repo(remote_url, branch, hide_progress, in_cache=in_cache) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 0f5db4bc52..23f62bdee2 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -166,7 +166,7 @@ def verify_sha(self, prompt, sha): return True - def setup_local_repo(self, remote, branch, hide_progress=True, cache_only=False): + def setup_local_repo(self, remote, branch, hide_progress=True, in_cache=False): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from @@ -177,7 +177,7 @@ def setup_local_repo(self, remote, branch, hide_progress=True, cache_only=False) branch (str): name of branch to use Sets self.repo """ - self.local_repo_dir = os.path.join(NFCORE_DIR if not cache_only else NFCORE_CACHE_DIR, self.fullname) + self.local_repo_dir = os.path.join(NFCORE_DIR if not in_cache else NFCORE_CACHE_DIR, self.fullname) try: if not os.path.exists(self.local_repo_dir): try: From d72169df6ed25c2f80a8c1f2ed2872242a060332 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 15 Feb 2023 23:03:02 +0100 Subject: [PATCH 06/42] TypeError: HEAD is a detached symbolic reference as it points to ... --- nf_core/download.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 5c2ff1607d..74b8abf3d8 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -205,13 +205,13 @@ def download_workflow_classic(self): def download_workflow_tower(self): """Create a bare-cloned git repository of the workflow that includes the configurations, such it can be launched with `tw launch` as file:/ pipeline""" - log.info("Cloning workflow files from GitHub") + log.info("Collecting workflow from GitHub") self.workflow_repo = WorkflowRepo(remote_url=f"git@github.com:{self.pipeline}.git", branch=self.revision) + import pbb - # Download the centralised configs + pdb.set_trace() log.info("Downloading centralised configs from GitHub") - self.download_configs() def prompt_pipeline_name(self): """Prompt for the pipeline name if not set with a flag""" @@ -851,3 +851,14 @@ def __init__(self, remote_url=None, branch=None, no_pull=False, hide_progress=Fa self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) self.setup_local_repo(remote_url, branch, hide_progress, in_cache=in_cache) + + @property + def active_branch(self): + """ + In ModuleRepo.setup_local_repo(), self.repo.active_branch.tracking_branch() is called in line 227. + For a WorkflowRepo, this raises a TypeError ``HEAD is a detached symbolic reference as it points to {commit hash}`` + + This property shadows the call and seemed the cleanest solution to prevent excessive code duplication. + Otherwise, I would have needed to define a setup_local_repo() method for the WorkflowRepo class. + """ + pass # TODO From a6b34926c3c4661a50a3000f273c3c0fb78ceec1 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 21 Feb 2023 15:11:00 +0100 Subject: [PATCH 07/42] Split history ./modules/modules_repo.py to synced_repo.py --- nf_core/{modules/modules_repo.py => temp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename nf_core/{modules/modules_repo.py => temp} (100%) diff --git a/nf_core/modules/modules_repo.py b/nf_core/temp similarity index 100% rename from nf_core/modules/modules_repo.py rename to nf_core/temp From 8984a6391fd0794e107d7696d3eedfd84e8f5918 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 21 Feb 2023 15:11:00 +0100 Subject: [PATCH 08/42] Split history ./modules/modules_repo.py to synced_repo.py --- nf_core/synced_repo.py | 498 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 498 insertions(+) create mode 100644 nf_core/synced_repo.py diff --git a/nf_core/synced_repo.py b/nf_core/synced_repo.py new file mode 100644 index 0000000000..23f62bdee2 --- /dev/null +++ b/nf_core/synced_repo.py @@ -0,0 +1,498 @@ +import filecmp +import logging +import os +import shutil +from pathlib import Path + +import git +import rich +import rich.progress +from git.exc import GitCommandError, InvalidGitRepositoryError + +import nf_core.modules.modules_json +import nf_core.modules.modules_utils +from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR, load_tools_config + +log = logging.getLogger(__name__) + +# Constants for the nf-core/modules repo used throughout the module files +NF_CORE_MODULES_NAME = "nf-core" +NF_CORE_MODULES_REMOTE = "https://github.com/nf-core/modules.git" +NF_CORE_MODULES_DEFAULT_BRANCH = "master" + + +class RemoteProgressbar(git.RemoteProgress): + """ + An object to create a progressbar for when doing an operation with the remote. + Note that an initialized rich Progress (progress bar) object must be past + during initialization. + """ + + def __init__(self, progress_bar, repo_name, remote_url, operation): + """ + Initializes the object and adds a task to the progressbar passed as 'progress_bar' + + Args: + progress_bar (rich.progress.Progress): A rich progress bar object + repo_name (str): Name of the repository the operation is performed on + remote_url (str): Git URL of the repository the operation is performed on + operation (str): The operation performed on the repository, i.e. 'Pulling', 'Cloning' etc. + """ + super().__init__() + self.progress_bar = progress_bar + self.tid = self.progress_bar.add_task( + f"{operation} from [bold green]'{repo_name}'[/bold green] ([link={remote_url}]{remote_url}[/link])", + start=False, + state="Waiting for response", + ) + + def update(self, op_code, cur_count, max_count=None, message=""): + """ + Overrides git.RemoteProgress.update. + Called every time there is a change in the remote operation + """ + if not self.progress_bar.tasks[self.tid].started: + self.progress_bar.start_task(self.tid) + self.progress_bar.update( + self.tid, total=max_count, completed=cur_count, state=f"{cur_count / max_count * 100:.1f}%" + ) + + +class ModulesRepo: + """ + An object to store details about the repository being used for modules. + + Used by the `nf-core modules` top-level command with -r and -b flags, + so that this can be used in the same way by all sub-commands. + + We keep track of the pull-status of the different installed repos in + the static variable local_repo_status. This is so we don't need to + pull a remote several times in one command. + """ + + local_repo_statuses = {} + no_pull_global = False + + @staticmethod + def local_repo_synced(repo_name): + """ + Checks whether a local repo has been cloned/pull in the current session + """ + return ModulesRepo.local_repo_statuses.get(repo_name, False) + + @staticmethod + def update_local_repo_status(repo_name, up_to_date): + """ + Updates the clone/pull status of a local repo + """ + ModulesRepo.local_repo_statuses[repo_name] = up_to_date + + @staticmethod + def get_remote_branches(remote_url): + """ + Get all branches from a remote repository + + Args: + remote_url (str): The git url to the remote repository + + Returns: + (set[str]): All branches found in the remote + """ + try: + unparsed_branches = git.Git().ls_remote(remote_url) + except git.GitCommandError: + raise LookupError(f"Was unable to fetch branches from '{remote_url}'") + else: + branches = {} + for branch_info in unparsed_branches.split("\n"): + sha, name = branch_info.split("\t") + if name != "HEAD": + # The remote branches are shown as 'ref/head/branch' + branch_name = Path(name).stem + branches[sha] = branch_name + return set(branches.values()) + + def __init__(self, remote_url=None, branch=None, no_pull=False, hide_progress=False): + """ + Initializes the object and clones the git repository if it is not already present + """ + + # This allows us to set this one time and then keep track of the user's choice + ModulesRepo.no_pull_global |= no_pull + + # Check if the remote seems to be well formed + if remote_url is None: + remote_url = NF_CORE_MODULES_REMOTE + + self.remote_url = remote_url + + self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) + + self.setup_local_repo(remote_url, branch, hide_progress) + + config_fn, repo_config = load_tools_config(self.local_repo_dir) + try: + self.repo_path = repo_config["org_path"] + except KeyError: + raise UserWarning(f"'org_path' key not present in {config_fn.name}") + + # Verify that the repo seems to be correctly configured + if self.repo_path != NF_CORE_MODULES_NAME or self.branch: + self.verify_branch() + + # Convenience variable + self.modules_dir = os.path.join(self.local_repo_dir, "modules", self.repo_path) + self.subworkflows_dir = os.path.join(self.local_repo_dir, "subworkflows", self.repo_path) + + self.avail_module_names = None + + def verify_sha(self, prompt, sha): + """ + Verify that 'sha' and 'prompt' arguments are not provided together. + Verify that the provided SHA exists in the repo. + + Arguments: + prompt (bool): prompt asking for SHA + sha (str): provided sha + """ + if prompt and sha is not None: + log.error("Cannot use '--sha' and '--prompt' at the same time!") + return False + + if sha: + if not self.sha_exists_on_branch(sha): + log.error(f"Commit SHA '{sha}' doesn't exist in '{self.remote_url}'") + return False + + return True + + def setup_local_repo(self, remote, branch, hide_progress=True, in_cache=False): + """ + Sets up the local git repository. If the repository has been cloned previously, it + returns a git.Repo object of that clone. Otherwise it tries to clone the repository from + the provided remote URL and returns a git.Repo of the new clone. + + Args: + remote (str): git url of remote + branch (str): name of branch to use + Sets self.repo + """ + self.local_repo_dir = os.path.join(NFCORE_DIR if not in_cache else NFCORE_CACHE_DIR, self.fullname) + try: + if not os.path.exists(self.local_repo_dir): + try: + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, + disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, + ) + with pbar: + self.repo = git.Repo.clone_from( + remote, + self.local_repo_dir, + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Cloning"), + ) + ModulesRepo.update_local_repo_status(self.fullname, True) + except GitCommandError: + raise LookupError(f"Failed to clone from the remote: `{remote}`") + # Verify that the requested branch exists by checking it out + self.setup_branch(branch) + else: + self.repo = git.Repo(self.local_repo_dir) + + if ModulesRepo.no_pull_global: + ModulesRepo.update_local_repo_status(self.fullname, True) + # If the repo is already cloned, fetch the latest changes from the remote + if not ModulesRepo.local_repo_synced(self.fullname): + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, + disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, + ) + with pbar: + self.repo.remotes.origin.fetch( + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Pulling") + ) + ModulesRepo.update_local_repo_status(self.fullname, True) + + # Before verifying the branch, fetch the changes + # Verify that the requested branch exists by checking it out + self.setup_branch(branch) + + # Now merge the changes + tracking_branch = self.repo.active_branch.tracking_branch() + if tracking_branch is None: + raise LookupError(f"There is no remote tracking branch '{self.branch}' in '{self.remote_url}'") + self.repo.git.merge(tracking_branch.name) + except (GitCommandError, InvalidGitRepositoryError) as e: + log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") + if rich.prompt.Confirm.ask(f"[violet]Delete local cache '{self.local_repo_dir}' and try again?"): + log.info(f"Removing '{self.local_repo_dir}'") + shutil.rmtree(self.local_repo_dir) + self.setup_local_repo(remote, branch, hide_progress) + else: + raise LookupError("Exiting due to error with local modules git repo") + + def setup_branch(self, branch): + """ + Verify that we have a branch and otherwise use the default one. + The branch is then checked out to verify that it exists in the repo. + + Args: + branch (str): Name of branch + """ + if branch is None: + # Don't bother fetching default branch if we're using nf-core + if self.remote_url == NF_CORE_MODULES_REMOTE: + self.branch = "master" + else: + self.branch = self.get_default_branch() + else: + self.branch = branch + + # Verify that the branch exists by checking it out + self.branch_exists() + + def get_default_branch(self): + """ + Gets the default branch for the repo (the branch origin/HEAD is pointing to) + """ + origin_head = next(ref for ref in self.repo.refs if ref.name == "origin/HEAD") + _, branch = origin_head.ref.name.split("/") + return branch + + def branch_exists(self): + """ + Verifies that the branch exists in the repository by trying to check it out + """ + try: + self.checkout_branch() + except GitCommandError: + raise LookupError(f"Branch '{self.branch}' not found in '{self.remote_url}'") + + def verify_branch(self): + """ + Verifies the active branch conforms do the correct directory structure + """ + dir_names = os.listdir(self.local_repo_dir) + if "modules" not in dir_names: + err_str = f"Repository '{self.remote_url}' ({self.branch}) does not contain the 'modules/' directory" + if "software" in dir_names: + err_str += ( + ".\nAs of nf-core/tools version 2.0, the 'software/' directory should be renamed to 'modules/'" + ) + raise LookupError(err_str) + + def checkout_branch(self): + """ + Checks out the specified branch of the repository + """ + self.repo.git.checkout(self.branch) + + def checkout(self, commit): + """ + Checks out the repository at the requested commit + + Args: + commit (str): Git SHA of the commit + """ + self.repo.git.checkout(commit) + + def component_exists(self, component_name, component_type, checkout=True, commit=None): + """ + Check if a module/subworkflow exists in the branch of the repo + + Args: + component_name (str): The name of the module/subworkflow + + Returns: + (bool): Whether the module/subworkflow exists in this branch of the repository + """ + return component_name in self.get_avail_components(component_type, checkout=checkout, commit=commit) + + def get_component_dir(self, component_name, component_type): + """ + Returns the file path of a module/subworkflow directory in the repo. + Does not verify that the path exists. + Args: + component_name (str): The name of the module/subworkflow + + Returns: + component_path (str): The path of the module/subworkflow in the local copy of the repository + """ + if component_type == "modules": + return os.path.join(self.modules_dir, component_name) + elif component_type == "subworkflows": + return os.path.join(self.subworkflows_dir, component_name) + + def install_component(self, component_name, install_dir, commit, component_type): + """ + Install the module/subworkflow files into a pipeline at the given commit + + Args: + component_name (str): The name of the module/subworkflow + install_dir (str): The path where the module/subworkflow should be installed + commit (str): The git SHA for the version of the module/subworkflow to be installed + + Returns: + (bool): Whether the operation was successful or not + """ + # Check out the repository at the requested ref + try: + self.checkout(commit) + except git.GitCommandError: + return False + + # Check if the module/subworkflow exists in the branch + if not self.component_exists(component_name, component_type, checkout=False): + log.error( + f"The requested {component_type[:-1]} does not exists in the branch '{self.branch}' of {self.remote_url}'" + ) + return False + + # Copy the files from the repo to the install folder + shutil.copytree(self.get_component_dir(component_name, component_type), Path(install_dir, component_name)) + + # Switch back to the tip of the branch + self.checkout_branch() + return True + + def module_files_identical(self, module_name, base_path, commit): + """ + Checks whether the module files in a pipeline are identical to the ones in the remote + Args: + module_name (str): The name of the module + base_path (str): The path to the module in the pipeline + + Returns: + (bool): Whether the pipeline files are identical to the repo files + """ + if commit is None: + self.checkout_branch() + else: + self.checkout(commit) + module_files = ["main.nf", "meta.yml"] + files_identical = {file: True for file in module_files} + module_dir = self.get_component_dir(module_name, "modules") + for file in module_files: + try: + files_identical[file] = filecmp.cmp(os.path.join(module_dir, file), os.path.join(base_path, file)) + except FileNotFoundError: + log.debug(f"Could not open file: {os.path.join(module_dir, file)}") + continue + self.checkout_branch() + return files_identical + + def get_component_git_log(self, component_name, component_type, depth=None): + """ + Fetches the commit history the of requested module/subworkflow since a given date. The default value is + not arbitrary - it is the last time the structure of the nf-core/modules repository was had an + update breaking backwards compatibility. + Args: + component_name (str): Name of module/subworkflow + modules_repo (ModulesRepo): A ModulesRepo object configured for the repository in question + + Returns: + ( dict ): Iterator of commit SHAs and associated (truncated) message + """ + self.checkout_branch() + component_path = os.path.join(component_type, self.repo_path, component_name) + commits_new = self.repo.iter_commits(max_count=depth, paths=component_path) + commits_new = [ + {"git_sha": commit.hexsha, "trunc_message": commit.message.partition("\n")[0]} for commit in commits_new + ] + commits_old = [] + if component_type == "modules": + # Grab commits also from previous modules structure + component_path = os.path.join("modules", component_name) + commits_old = self.repo.iter_commits(max_count=depth, paths=component_path) + commits_old = [ + {"git_sha": commit.hexsha, "trunc_message": commit.message.partition("\n")[0]} for commit in commits_old + ] + commits = iter(commits_new + commits_old) + return commits + + def get_latest_component_version(self, component_name, component_type): + """ + Returns the latest commit in the repository + """ + return list(self.get_component_git_log(component_name, component_type, depth=1))[0]["git_sha"] + + def sha_exists_on_branch(self, sha): + """ + Verifies that a given commit sha exists on the branch + """ + self.checkout_branch() + return sha in (commit.hexsha for commit in self.repo.iter_commits()) + + def get_commit_info(self, sha): + """ + Fetches metadata about the commit (dates, message, etc.) + Args: + commit_sha (str): The SHA of the requested commit + Returns: + message (str): The commit message for the requested commit + date (str): The commit date for the requested commit + Raises: + LookupError: If the search for the commit fails + """ + self.checkout_branch() + for commit in self.repo.iter_commits(): + if commit.hexsha == sha: + message = commit.message.partition("\n")[0] + date_obj = commit.committed_datetime + date = str(date_obj.date()) + return message, date + raise LookupError(f"Commit '{sha}' not found in the '{self.remote_url}'") + + def get_avail_components(self, component_type, checkout=True, commit=None): + """ + Gets the names of the modules/subworkflows in the repository. They are detected by + checking which directories have a 'main.nf' file + + Returns: + ([ str ]): The module/subworkflow names + """ + if checkout: + self.checkout_branch() + if commit is not None: + self.checkout(commit) + # Get directory + if component_type == "modules": + directory = self.modules_dir + elif component_type == "subworkflows": + directory = self.subworkflows_dir + # Module/Subworkflow directories are characterized by having a 'main.nf' file + avail_component_names = [ + os.path.relpath(dirpath, start=directory) + for dirpath, _, file_names in os.walk(directory) + if "main.nf" in file_names + ] + return avail_component_names + + def get_meta_yml(self, component_type, module_name): + """ + Returns the contents of the 'meta.yml' file of a module + + Args: + module_name (str): The name of the module + + Returns: + (str): The contents of the file in text format + """ + self.checkout_branch() + if component_type == "modules": + path = Path(self.modules_dir, module_name, "meta.yml") + elif component_type == "subworkflows": + path = Path(self.subworkflows_dir, module_name, "meta.yml") + else: + raise ValueError(f"Invalid component type: {component_type}") + if not path.exists(): + return None + with open(path) as fh: + contents = fh.read() + return contents From 33d03812201ae59047fa8cf48ed06a5e1d055846 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 21 Feb 2023 15:11:01 +0100 Subject: [PATCH 09/42] Split history ./modules/modules_repo.py to synced_repo.py --- nf_core/{temp => modules/modules_repo.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename nf_core/{temp => modules/modules_repo.py} (100%) diff --git a/nf_core/temp b/nf_core/modules/modules_repo.py similarity index 100% rename from nf_core/temp rename to nf_core/modules/modules_repo.py From caef187c811f5b28e5b326adb59c012fad726ae5 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 21 Feb 2023 15:53:19 +0100 Subject: [PATCH 10/42] Duplication of ModulesRepo to SyncedRepo done. --- nf_core/download.py | 15 ++------------- nf_core/modules/modules_repo.py | 3 ++- nf_core/synced_repo.py | 31 +++++++++++-------------------- 3 files changed, 15 insertions(+), 34 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 74b8abf3d8..e92e50164f 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -24,7 +24,7 @@ import nf_core import nf_core.list import nf_core.utils -from nf_core.modules import ModulesRepo # to create subclass WorkflowRepo +from nf_core.synced_repo import SyncedRepo # to create subclass WorkflowRepo log = logging.getLogger(__name__) stderr = rich.console.Console( @@ -826,7 +826,7 @@ def compress_download(self): log.info(f"MD5 checksum for '{self.output_filename}': [blue]{nf_core.utils.file_md5(self.output_filename)}[/]") -class WorkflowRepo(ModulesRepo): +class WorkflowRepo(SyncedRepo): """ An object to store details about a locally cached workflow repository. @@ -851,14 +851,3 @@ def __init__(self, remote_url=None, branch=None, no_pull=False, hide_progress=Fa self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) self.setup_local_repo(remote_url, branch, hide_progress, in_cache=in_cache) - - @property - def active_branch(self): - """ - In ModuleRepo.setup_local_repo(), self.repo.active_branch.tracking_branch() is called in line 227. - For a WorkflowRepo, this raises a TypeError ``HEAD is a detached symbolic reference as it points to {commit hash}`` - - This property shadows the call and seemed the cleanest solution to prevent excessive code duplication. - Otherwise, I would have needed to define a setup_local_repo() method for the WorkflowRepo class. - """ - pass # TODO diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 23f62bdee2..5e4d80be16 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -12,6 +12,7 @@ import nf_core.modules.modules_json import nf_core.modules.modules_utils from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR, load_tools_config +from nf_core.synced_repo import SyncedRepo log = logging.getLogger(__name__) @@ -58,7 +59,7 @@ def update(self, op_code, cur_count, max_count=None, message=""): ) -class ModulesRepo: +class ModulesRepo(SyncedRepo): """ An object to store details about the repository being used for modules. diff --git a/nf_core/synced_repo.py b/nf_core/synced_repo.py index 23f62bdee2..89d2f894b1 100644 --- a/nf_core/synced_repo.py +++ b/nf_core/synced_repo.py @@ -9,8 +9,6 @@ import rich.progress from git.exc import GitCommandError, InvalidGitRepositoryError -import nf_core.modules.modules_json -import nf_core.modules.modules_utils from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR, load_tools_config log = logging.getLogger(__name__) @@ -58,16 +56,9 @@ def update(self, op_code, cur_count, max_count=None, message=""): ) -class ModulesRepo: +class SyncedRepo: """ - An object to store details about the repository being used for modules. - - Used by the `nf-core modules` top-level command with -r and -b flags, - so that this can be used in the same way by all sub-commands. - - We keep track of the pull-status of the different installed repos in - the static variable local_repo_status. This is so we don't need to - pull a remote several times in one command. + An object to store details about a locally cached code repository. """ local_repo_statuses = {} @@ -78,14 +69,14 @@ def local_repo_synced(repo_name): """ Checks whether a local repo has been cloned/pull in the current session """ - return ModulesRepo.local_repo_statuses.get(repo_name, False) + return SyncedRepo.local_repo_statuses.get(repo_name, False) @staticmethod def update_local_repo_status(repo_name, up_to_date): """ Updates the clone/pull status of a local repo """ - ModulesRepo.local_repo_statuses[repo_name] = up_to_date + SyncedRepo.local_repo_statuses[repo_name] = up_to_date @staticmethod def get_remote_branches(remote_url): @@ -118,7 +109,7 @@ def __init__(self, remote_url=None, branch=None, no_pull=False, hide_progress=Fa """ # This allows us to set this one time and then keep track of the user's choice - ModulesRepo.no_pull_global |= no_pull + SyncedRepo.no_pull_global |= no_pull # Check if the remote seems to be well formed if remote_url is None: @@ -194,7 +185,7 @@ def setup_local_repo(self, remote, branch, hide_progress=True, in_cache=False): self.local_repo_dir, progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Cloning"), ) - ModulesRepo.update_local_repo_status(self.fullname, True) + SyncedRepo.update_local_repo_status(self.fullname, True) except GitCommandError: raise LookupError(f"Failed to clone from the remote: `{remote}`") # Verify that the requested branch exists by checking it out @@ -202,10 +193,10 @@ def setup_local_repo(self, remote, branch, hide_progress=True, in_cache=False): else: self.repo = git.Repo(self.local_repo_dir) - if ModulesRepo.no_pull_global: - ModulesRepo.update_local_repo_status(self.fullname, True) + if SyncedRepo.no_pull_global: + SyncedRepo.update_local_repo_status(self.fullname, True) # If the repo is already cloned, fetch the latest changes from the remote - if not ModulesRepo.local_repo_synced(self.fullname): + if not SyncedRepo.local_repo_synced(self.fullname): pbar = rich.progress.Progress( "[bold blue]{task.description}", rich.progress.BarColumn(bar_width=None), @@ -217,7 +208,7 @@ def setup_local_repo(self, remote, branch, hide_progress=True, in_cache=False): self.repo.remotes.origin.fetch( progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Pulling") ) - ModulesRepo.update_local_repo_status(self.fullname, True) + SyncedRepo.update_local_repo_status(self.fullname, True) # Before verifying the branch, fetch the changes # Verify that the requested branch exists by checking it out @@ -394,7 +385,7 @@ def get_component_git_log(self, component_name, component_type, depth=None): update breaking backwards compatibility. Args: component_name (str): Name of module/subworkflow - modules_repo (ModulesRepo): A ModulesRepo object configured for the repository in question + modules_repo (SyncedRepo): A SyncedRepo object configured for the repository in question Returns: ( dict ): Iterator of commit SHAs and associated (truncated) message From 2367ae736e1e7de54f636f576200c5933a425606 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 21 Feb 2023 15:59:03 +0100 Subject: [PATCH 11/42] Strip ModulesRepo class of the methods moved to new superclass. --- nf_core/modules/modules_repo.py | 358 +------------------------------- 1 file changed, 1 insertion(+), 357 deletions(-) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 5e4d80be16..20d581af84 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -12,7 +12,7 @@ import nf_core.modules.modules_json import nf_core.modules.modules_utils from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR, load_tools_config -from nf_core.synced_repo import SyncedRepo +from nf_core.synced_repo import RemoteProgressbar, SyncedRepo log = logging.getLogger(__name__) @@ -22,43 +22,6 @@ NF_CORE_MODULES_DEFAULT_BRANCH = "master" -class RemoteProgressbar(git.RemoteProgress): - """ - An object to create a progressbar for when doing an operation with the remote. - Note that an initialized rich Progress (progress bar) object must be past - during initialization. - """ - - def __init__(self, progress_bar, repo_name, remote_url, operation): - """ - Initializes the object and adds a task to the progressbar passed as 'progress_bar' - - Args: - progress_bar (rich.progress.Progress): A rich progress bar object - repo_name (str): Name of the repository the operation is performed on - remote_url (str): Git URL of the repository the operation is performed on - operation (str): The operation performed on the repository, i.e. 'Pulling', 'Cloning' etc. - """ - super().__init__() - self.progress_bar = progress_bar - self.tid = self.progress_bar.add_task( - f"{operation} from [bold green]'{repo_name}'[/bold green] ([link={remote_url}]{remote_url}[/link])", - start=False, - state="Waiting for response", - ) - - def update(self, op_code, cur_count, max_count=None, message=""): - """ - Overrides git.RemoteProgress.update. - Called every time there is a change in the remote operation - """ - if not self.progress_bar.tasks[self.tid].started: - self.progress_bar.start_task(self.tid) - self.progress_bar.update( - self.tid, total=max_count, completed=cur_count, state=f"{cur_count / max_count * 100:.1f}%" - ) - - class ModulesRepo(SyncedRepo): """ An object to store details about the repository being used for modules. @@ -74,45 +37,6 @@ class ModulesRepo(SyncedRepo): local_repo_statuses = {} no_pull_global = False - @staticmethod - def local_repo_synced(repo_name): - """ - Checks whether a local repo has been cloned/pull in the current session - """ - return ModulesRepo.local_repo_statuses.get(repo_name, False) - - @staticmethod - def update_local_repo_status(repo_name, up_to_date): - """ - Updates the clone/pull status of a local repo - """ - ModulesRepo.local_repo_statuses[repo_name] = up_to_date - - @staticmethod - def get_remote_branches(remote_url): - """ - Get all branches from a remote repository - - Args: - remote_url (str): The git url to the remote repository - - Returns: - (set[str]): All branches found in the remote - """ - try: - unparsed_branches = git.Git().ls_remote(remote_url) - except git.GitCommandError: - raise LookupError(f"Was unable to fetch branches from '{remote_url}'") - else: - branches = {} - for branch_info in unparsed_branches.split("\n"): - sha, name = branch_info.split("\t") - if name != "HEAD": - # The remote branches are shown as 'ref/head/branch' - branch_name = Path(name).stem - branches[sha] = branch_name - return set(branches.values()) - def __init__(self, remote_url=None, branch=None, no_pull=False, hide_progress=False): """ Initializes the object and clones the git repository if it is not already present @@ -147,26 +71,6 @@ def __init__(self, remote_url=None, branch=None, no_pull=False, hide_progress=Fa self.avail_module_names = None - def verify_sha(self, prompt, sha): - """ - Verify that 'sha' and 'prompt' arguments are not provided together. - Verify that the provided SHA exists in the repo. - - Arguments: - prompt (bool): prompt asking for SHA - sha (str): provided sha - """ - if prompt and sha is not None: - log.error("Cannot use '--sha' and '--prompt' at the same time!") - return False - - if sha: - if not self.sha_exists_on_branch(sha): - log.error(f"Commit SHA '{sha}' doesn't exist in '{self.remote_url}'") - return False - - return True - def setup_local_repo(self, remote, branch, hide_progress=True, in_cache=False): """ Sets up the local git repository. If the repository has been cloned previously, it @@ -237,263 +141,3 @@ def setup_local_repo(self, remote, branch, hide_progress=True, in_cache=False): self.setup_local_repo(remote, branch, hide_progress) else: raise LookupError("Exiting due to error with local modules git repo") - - def setup_branch(self, branch): - """ - Verify that we have a branch and otherwise use the default one. - The branch is then checked out to verify that it exists in the repo. - - Args: - branch (str): Name of branch - """ - if branch is None: - # Don't bother fetching default branch if we're using nf-core - if self.remote_url == NF_CORE_MODULES_REMOTE: - self.branch = "master" - else: - self.branch = self.get_default_branch() - else: - self.branch = branch - - # Verify that the branch exists by checking it out - self.branch_exists() - - def get_default_branch(self): - """ - Gets the default branch for the repo (the branch origin/HEAD is pointing to) - """ - origin_head = next(ref for ref in self.repo.refs if ref.name == "origin/HEAD") - _, branch = origin_head.ref.name.split("/") - return branch - - def branch_exists(self): - """ - Verifies that the branch exists in the repository by trying to check it out - """ - try: - self.checkout_branch() - except GitCommandError: - raise LookupError(f"Branch '{self.branch}' not found in '{self.remote_url}'") - - def verify_branch(self): - """ - Verifies the active branch conforms do the correct directory structure - """ - dir_names = os.listdir(self.local_repo_dir) - if "modules" not in dir_names: - err_str = f"Repository '{self.remote_url}' ({self.branch}) does not contain the 'modules/' directory" - if "software" in dir_names: - err_str += ( - ".\nAs of nf-core/tools version 2.0, the 'software/' directory should be renamed to 'modules/'" - ) - raise LookupError(err_str) - - def checkout_branch(self): - """ - Checks out the specified branch of the repository - """ - self.repo.git.checkout(self.branch) - - def checkout(self, commit): - """ - Checks out the repository at the requested commit - - Args: - commit (str): Git SHA of the commit - """ - self.repo.git.checkout(commit) - - def component_exists(self, component_name, component_type, checkout=True, commit=None): - """ - Check if a module/subworkflow exists in the branch of the repo - - Args: - component_name (str): The name of the module/subworkflow - - Returns: - (bool): Whether the module/subworkflow exists in this branch of the repository - """ - return component_name in self.get_avail_components(component_type, checkout=checkout, commit=commit) - - def get_component_dir(self, component_name, component_type): - """ - Returns the file path of a module/subworkflow directory in the repo. - Does not verify that the path exists. - Args: - component_name (str): The name of the module/subworkflow - - Returns: - component_path (str): The path of the module/subworkflow in the local copy of the repository - """ - if component_type == "modules": - return os.path.join(self.modules_dir, component_name) - elif component_type == "subworkflows": - return os.path.join(self.subworkflows_dir, component_name) - - def install_component(self, component_name, install_dir, commit, component_type): - """ - Install the module/subworkflow files into a pipeline at the given commit - - Args: - component_name (str): The name of the module/subworkflow - install_dir (str): The path where the module/subworkflow should be installed - commit (str): The git SHA for the version of the module/subworkflow to be installed - - Returns: - (bool): Whether the operation was successful or not - """ - # Check out the repository at the requested ref - try: - self.checkout(commit) - except git.GitCommandError: - return False - - # Check if the module/subworkflow exists in the branch - if not self.component_exists(component_name, component_type, checkout=False): - log.error( - f"The requested {component_type[:-1]} does not exists in the branch '{self.branch}' of {self.remote_url}'" - ) - return False - - # Copy the files from the repo to the install folder - shutil.copytree(self.get_component_dir(component_name, component_type), Path(install_dir, component_name)) - - # Switch back to the tip of the branch - self.checkout_branch() - return True - - def module_files_identical(self, module_name, base_path, commit): - """ - Checks whether the module files in a pipeline are identical to the ones in the remote - Args: - module_name (str): The name of the module - base_path (str): The path to the module in the pipeline - - Returns: - (bool): Whether the pipeline files are identical to the repo files - """ - if commit is None: - self.checkout_branch() - else: - self.checkout(commit) - module_files = ["main.nf", "meta.yml"] - files_identical = {file: True for file in module_files} - module_dir = self.get_component_dir(module_name, "modules") - for file in module_files: - try: - files_identical[file] = filecmp.cmp(os.path.join(module_dir, file), os.path.join(base_path, file)) - except FileNotFoundError: - log.debug(f"Could not open file: {os.path.join(module_dir, file)}") - continue - self.checkout_branch() - return files_identical - - def get_component_git_log(self, component_name, component_type, depth=None): - """ - Fetches the commit history the of requested module/subworkflow since a given date. The default value is - not arbitrary - it is the last time the structure of the nf-core/modules repository was had an - update breaking backwards compatibility. - Args: - component_name (str): Name of module/subworkflow - modules_repo (ModulesRepo): A ModulesRepo object configured for the repository in question - - Returns: - ( dict ): Iterator of commit SHAs and associated (truncated) message - """ - self.checkout_branch() - component_path = os.path.join(component_type, self.repo_path, component_name) - commits_new = self.repo.iter_commits(max_count=depth, paths=component_path) - commits_new = [ - {"git_sha": commit.hexsha, "trunc_message": commit.message.partition("\n")[0]} for commit in commits_new - ] - commits_old = [] - if component_type == "modules": - # Grab commits also from previous modules structure - component_path = os.path.join("modules", component_name) - commits_old = self.repo.iter_commits(max_count=depth, paths=component_path) - commits_old = [ - {"git_sha": commit.hexsha, "trunc_message": commit.message.partition("\n")[0]} for commit in commits_old - ] - commits = iter(commits_new + commits_old) - return commits - - def get_latest_component_version(self, component_name, component_type): - """ - Returns the latest commit in the repository - """ - return list(self.get_component_git_log(component_name, component_type, depth=1))[0]["git_sha"] - - def sha_exists_on_branch(self, sha): - """ - Verifies that a given commit sha exists on the branch - """ - self.checkout_branch() - return sha in (commit.hexsha for commit in self.repo.iter_commits()) - - def get_commit_info(self, sha): - """ - Fetches metadata about the commit (dates, message, etc.) - Args: - commit_sha (str): The SHA of the requested commit - Returns: - message (str): The commit message for the requested commit - date (str): The commit date for the requested commit - Raises: - LookupError: If the search for the commit fails - """ - self.checkout_branch() - for commit in self.repo.iter_commits(): - if commit.hexsha == sha: - message = commit.message.partition("\n")[0] - date_obj = commit.committed_datetime - date = str(date_obj.date()) - return message, date - raise LookupError(f"Commit '{sha}' not found in the '{self.remote_url}'") - - def get_avail_components(self, component_type, checkout=True, commit=None): - """ - Gets the names of the modules/subworkflows in the repository. They are detected by - checking which directories have a 'main.nf' file - - Returns: - ([ str ]): The module/subworkflow names - """ - if checkout: - self.checkout_branch() - if commit is not None: - self.checkout(commit) - # Get directory - if component_type == "modules": - directory = self.modules_dir - elif component_type == "subworkflows": - directory = self.subworkflows_dir - # Module/Subworkflow directories are characterized by having a 'main.nf' file - avail_component_names = [ - os.path.relpath(dirpath, start=directory) - for dirpath, _, file_names in os.walk(directory) - if "main.nf" in file_names - ] - return avail_component_names - - def get_meta_yml(self, component_type, module_name): - """ - Returns the contents of the 'meta.yml' file of a module - - Args: - module_name (str): The name of the module - - Returns: - (str): The contents of the file in text format - """ - self.checkout_branch() - if component_type == "modules": - path = Path(self.modules_dir, module_name, "meta.yml") - elif component_type == "subworkflows": - path = Path(self.subworkflows_dir, module_name, "meta.yml") - else: - raise ValueError(f"Invalid component type: {component_type}") - if not path.exists(): - return None - with open(path) as fh: - contents = fh.read() - return contents From f5f0df2327fb524c5b2f519a52b5ef52e28a26c1 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 21 Feb 2023 17:18:38 +0100 Subject: [PATCH 12/42] Rebase to current dev. --- nf_core/synced_repo.py | 75 ++---------------------------------------- 1 file changed, 2 insertions(+), 73 deletions(-) diff --git a/nf_core/synced_repo.py b/nf_core/synced_repo.py index 89d2f894b1..715f6d77bb 100644 --- a/nf_core/synced_repo.py +++ b/nf_core/synced_repo.py @@ -7,9 +7,9 @@ import git import rich import rich.progress -from git.exc import GitCommandError, InvalidGitRepositoryError +from git.exc import GitCommandError -from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR, load_tools_config +from nf_core.utils import load_tools_config log = logging.getLogger(__name__) @@ -157,77 +157,6 @@ def verify_sha(self, prompt, sha): return True - def setup_local_repo(self, remote, branch, hide_progress=True, in_cache=False): - """ - Sets up the local git repository. If the repository has been cloned previously, it - returns a git.Repo object of that clone. Otherwise it tries to clone the repository from - the provided remote URL and returns a git.Repo of the new clone. - - Args: - remote (str): git url of remote - branch (str): name of branch to use - Sets self.repo - """ - self.local_repo_dir = os.path.join(NFCORE_DIR if not in_cache else NFCORE_CACHE_DIR, self.fullname) - try: - if not os.path.exists(self.local_repo_dir): - try: - pbar = rich.progress.Progress( - "[bold blue]{task.description}", - rich.progress.BarColumn(bar_width=None), - "[bold yellow]{task.fields[state]}", - transient=True, - disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, - ) - with pbar: - self.repo = git.Repo.clone_from( - remote, - self.local_repo_dir, - progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Cloning"), - ) - SyncedRepo.update_local_repo_status(self.fullname, True) - except GitCommandError: - raise LookupError(f"Failed to clone from the remote: `{remote}`") - # Verify that the requested branch exists by checking it out - self.setup_branch(branch) - else: - self.repo = git.Repo(self.local_repo_dir) - - if SyncedRepo.no_pull_global: - SyncedRepo.update_local_repo_status(self.fullname, True) - # If the repo is already cloned, fetch the latest changes from the remote - if not SyncedRepo.local_repo_synced(self.fullname): - pbar = rich.progress.Progress( - "[bold blue]{task.description}", - rich.progress.BarColumn(bar_width=None), - "[bold yellow]{task.fields[state]}", - transient=True, - disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, - ) - with pbar: - self.repo.remotes.origin.fetch( - progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Pulling") - ) - SyncedRepo.update_local_repo_status(self.fullname, True) - - # Before verifying the branch, fetch the changes - # Verify that the requested branch exists by checking it out - self.setup_branch(branch) - - # Now merge the changes - tracking_branch = self.repo.active_branch.tracking_branch() - if tracking_branch is None: - raise LookupError(f"There is no remote tracking branch '{self.branch}' in '{self.remote_url}'") - self.repo.git.merge(tracking_branch.name) - except (GitCommandError, InvalidGitRepositoryError) as e: - log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") - if rich.prompt.Confirm.ask(f"[violet]Delete local cache '{self.local_repo_dir}' and try again?"): - log.info(f"Removing '{self.local_repo_dir}'") - shutil.rmtree(self.local_repo_dir) - self.setup_local_repo(remote, branch, hide_progress) - else: - raise LookupError("Exiting due to error with local modules git repo") - def setup_branch(self, branch): """ Verify that we have a branch and otherwise use the default one. From f852159b7d38e863d419aaa2e21a04724c8cb885 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 21 Feb 2023 21:40:40 +0100 Subject: [PATCH 13/42] Local caching of the repo works now. --- nf_core/download.py | 99 ++++++++++++++++++++++++++++++++++++------ nf_core/synced_repo.py | 2 +- 2 files changed, 86 insertions(+), 15 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index e92e50164f..67c987bb44 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -3,7 +3,8 @@ from __future__ import print_function import concurrent.futures -from git import Repo +import git +from git.exc import GitCommandError, InvalidGitRepositoryError import io import logging import os @@ -24,7 +25,8 @@ import nf_core import nf_core.list import nf_core.utils -from nf_core.synced_repo import SyncedRepo # to create subclass WorkflowRepo +from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR +from nf_core.synced_repo import RemoteProgressbar, SyncedRepo log = logging.getLogger(__name__) stderr = rich.console.Console( @@ -127,7 +129,8 @@ def download_workflow(self): self.prompt_container_download() self.prompt_use_singularity_cachedir() self.prompt_singularity_cachedir_only() - self.prompt_compression_type() + if not self.tower: + self.prompt_compression_type() except AssertionError as e: log.critical(e) sys.exit(1) @@ -207,10 +210,10 @@ def download_workflow_tower(self): """Create a bare-cloned git repository of the workflow that includes the configurations, such it can be launched with `tw launch` as file:/ pipeline""" log.info("Collecting workflow from GitHub") - self.workflow_repo = WorkflowRepo(remote_url=f"git@github.com:{self.pipeline}.git", branch=self.revision) - import pbb + self.workflow_repo = WorkflowRepo( + remote_url=f"git@github.com:{self.pipeline}.git", revision=self.revision, commit=self.wf_sha + ) - pdb.set_trace() log.info("Downloading centralised configs from GitHub") def prompt_pipeline_name(self): @@ -625,12 +628,12 @@ def singularity_image_filenames(self, container): """Check Singularity cache for image, copy to destination folder if found. Args: - container (str): A pipeline's container name. Can be direct download URL - or a Docker Hub repository ID. + container (str): A pipeline's container name. Can be direct download URL + or a Docker Hub repository ID. Returns: - results (bool, str): Returns True if we have the image in the target location. - Returns a download path if not. + results (bool, str): Returns True if we have the image in the target location. + Returns a download path if not. """ # Generate file paths @@ -836,18 +839,86 @@ class WorkflowRepo(SyncedRepo): """ - def __init__(self, remote_url=None, branch=None, no_pull=False, hide_progress=False, in_cache=True): + def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=False, in_cache=True): """ Initializes the object and clones the workflows git repository if it is not already present Args: - remote_url (str, optional): The URL of the remote repository. Defaults to None. - branch (str, optional): The branch to clone. Defaults to None. + remote_url (str): The URL of the remote repository. Defaults to None. + commit (str): The commit to clone. Defaults to None. no_pull (bool, optional): Whether to skip the pull step. Defaults to False. hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. """ self.remote_url = remote_url + self.revision = revision + self.commit = commit self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) - self.setup_local_repo(remote_url, branch, hide_progress, in_cache=in_cache) + self.setup_local_repo(remote_url, revision, commit, hide_progress, in_cache=in_cache) + + def setup_local_repo(self, remote, revision, commit, hide_progress=True, in_cache=True): + """ + Sets up the local git repository. If the repository has been cloned previously, it + returns a git.Repo object of that clone. Otherwise it tries to clone the repository from + the provided remote URL and returns a git.Repo of the new clone. + + Args: + remote (str): git url of remote + branch (str): name of branch to use + hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. + in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. + Sets self.repo + """ + + self.local_repo_dir = os.path.join(NFCORE_DIR if not in_cache else NFCORE_CACHE_DIR, self.fullname) + try: + if not os.path.exists(self.local_repo_dir): + try: + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, + disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, + ) + with pbar: + self.repo = git.Repo.clone_from( + remote, + self.local_repo_dir, + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Cloning"), + ) + super().update_local_repo_status(self.fullname, True) + except GitCommandError: + raise LookupError(f"Failed to clone from the remote: `{remote}`") + else: + self.repo = git.Repo(self.local_repo_dir) + + if super().no_pull_global: + super().update_local_repo_status(self.fullname, True) + # If the repo is already cloned, fetch the latest changes from the remote + if not super().local_repo_synced(self.fullname): + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, + disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, + ) + with pbar: + self.repo.remotes.origin.fetch( + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Pulling") + ) + super().update_local_repo_status(self.fullname, True) + + except (GitCommandError, InvalidGitRepositoryError) as e: + log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") + if rich.prompt.Confirm.ask(f"[violet]Delete local cache '{self.local_repo_dir}' and try again?"): + log.info(f"Removing '{self.local_repo_dir}'") + shutil.rmtree(self.local_repo_dir) + self.setup_local_repo(remote, revision, commit, hide_progress) + else: + raise LookupError("Exiting due to error with local modules git repo") + + finally: + self.repo.git.checkout(commit) diff --git a/nf_core/synced_repo.py b/nf_core/synced_repo.py index 715f6d77bb..f04ef8e0c7 100644 --- a/nf_core/synced_repo.py +++ b/nf_core/synced_repo.py @@ -9,7 +9,7 @@ import rich.progress from git.exc import GitCommandError -from nf_core.utils import load_tools_config +from nf_core.utils import load_tools_config log = logging.getLogger(__name__) From af4754e05d02dc1239861fb8819ab8c0c6f61f9f Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 22 Feb 2023 20:13:07 +0100 Subject: [PATCH 14/42] Started implementing the config download. --- nf_core/download.py | 70 +++++++++++++++++++++++++++++++++++++----- nf_core/synced_repo.py | 2 +- 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 67c987bb44..0d33b601f0 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -100,6 +100,7 @@ def __init__( self.compress_type = compress_type self.force = force self.tower = tower + self.include_config self.container = container self.singularity_cache_only = singularity_cache_only self.parallel_downloads = parallel_downloads @@ -210,10 +211,13 @@ def download_workflow_tower(self): """Create a bare-cloned git repository of the workflow that includes the configurations, such it can be launched with `tw launch` as file:/ pipeline""" log.info("Collecting workflow from GitHub") + self.workflow_repo = WorkflowRepo( remote_url=f"git@github.com:{self.pipeline}.git", revision=self.revision, commit=self.wf_sha ) + import pdb + pdb.set_trace() log.info("Downloading centralised configs from GitHub") def prompt_pipeline_name(self): @@ -374,6 +378,12 @@ def prompt_compression_type(self): if self.compress_type == "none": self.compress_type = None + def prompt_config_inclusion(self): + """Prompt for inclusion of institutional configurations""" + self.include_configs = questionary.confirm( + "Include the institutional configuration files into the download?" + ).ask() + def download_wf_files(self): """Downloads workflow files from GitHub to the :attr:`self.outdir`.""" log.debug(f"Downloading {self.wf_download_url}") @@ -853,11 +863,24 @@ def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=Fa self.remote_url = remote_url self.revision = revision self.commit = commit + self.hide_progress = hide_progress self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) - self.setup_local_repo(remote_url, revision, commit, hide_progress, in_cache=in_cache) + self.setup_local_repo(remote_url, commit, hide_progress, in_cache=in_cache) + + def __repr__(self): + """Called by print, creates representation of object""" + return f"" + + def retry_setup_local_repo(self): + if rich.prompt.Confirm.ask(f"[violet]Delete local cache '{self.local_repo_dir}' and try again?"): + log.info(f"Removing '{self.local_repo_dir}'") + shutil.rmtree(self.local_repo_dir) + self.setup_local_repo(self.remote, self.commit, self.hide_progress) + else: + raise LookupError("Exiting due to error with local modules git repo") - def setup_local_repo(self, remote, revision, commit, hide_progress=True, in_cache=True): + def setup_local_repo(self, remote, commit, hide_progress=False, in_cache=True): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from @@ -913,12 +936,43 @@ def setup_local_repo(self, remote, revision, commit, hide_progress=True, in_cach except (GitCommandError, InvalidGitRepositoryError) as e: log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") - if rich.prompt.Confirm.ask(f"[violet]Delete local cache '{self.local_repo_dir}' and try again?"): - log.info(f"Removing '{self.local_repo_dir}'") - shutil.rmtree(self.local_repo_dir) - self.setup_local_repo(remote, revision, commit, hide_progress) - else: - raise LookupError("Exiting due to error with local modules git repo") + self.retry_setup_local_repo() + finally: + self.repo.git.checkout(commit) + + def add_nfcore_configs(self, commit, hide_progress=False): + """ + Pulls the configuration profiles from the nf-core/config repository on GitHub. + + Args: + commit: The config version to pull + hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. + Sets self.repo + """ + try: + if os.path.exists(self.local_repo_dir): + try: + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, + disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, + ) + with pbar: + self.configs = git.Submodule.add( + self.repo, + "nf-core configuration", + "./conf_institutional", + f"git@github.com:nf-core/configs.git", + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Adding configuration"), + ) + except GitCommandError: + raise LookupError(f"Failed to retrieve configuration: `{remote}`") + + except (GitCommandError, InvalidGitRepositoryError) as e: + log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") + self.retry_setup_local_repo() finally: self.repo.git.checkout(commit) diff --git a/nf_core/synced_repo.py b/nf_core/synced_repo.py index f04ef8e0c7..4bbd4f8443 100644 --- a/nf_core/synced_repo.py +++ b/nf_core/synced_repo.py @@ -22,7 +22,7 @@ class RemoteProgressbar(git.RemoteProgress): """ An object to create a progressbar for when doing an operation with the remote. - Note that an initialized rich Progress (progress bar) object must be past + Note that an initialized rich Progress (progress bar) object must be passed during initialization. """ From 3bc97c553de3126dd930e3d6ef1882f25b70241c Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 24 Feb 2023 18:42:12 +0100 Subject: [PATCH 15/42] Started to implement the multiple revision selection for the Tower download. --- nf_core/download.py | 78 ++++++++++++++++++++++----------------------- nf_core/utils.py | 10 ++++-- 2 files changed, 46 insertions(+), 42 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 0d33b601f0..ff06faecdc 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -75,7 +75,7 @@ class DownloadWorkflow: Args: pipeline (str): A nf-core pipeline name. - revision (str): The workflow revision to download, like `1.0`. Defaults to None. + revision (List[str]): The workflow revision to download, like `1.0`. Defaults to None. singularity (bool): Flag, if the Singularity container should be downloaded as well. Defaults to False. tower (bool): Flag, to customize the download for Nextflow Tower (convert to git bare repo). Defaults to False. outdir (str): Path to the local download directory. Defaults to None. @@ -94,20 +94,19 @@ def __init__( parallel_downloads=4, ): self.pipeline = pipeline - self.revision = revision + self.revision = [].extend(revision) if revision else [] self.outdir = outdir self.output_filename = None self.compress_type = compress_type self.force = force self.tower = tower - self.include_config self.container = container self.singularity_cache_only = singularity_cache_only self.parallel_downloads = parallel_downloads self.wf_revisions = {} self.wf_branches = {} - self.wf_sha = None + self.wf_sha = {} self.wf_download_url = None self.nf_config = {} self.containers = [] @@ -136,7 +135,7 @@ def download_workflow(self): log.critical(e) sys.exit(1) - summary_log = [f"Pipeline revision: '{self.revision}'", f"Pull containers: '{self.container}'"] + summary_log = [f"Pipeline revision: '{','.join(self.revision)}'", f"Pull containers: '{self.container}'"] if self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None: summary_log.append(f"Using [blue]$NXF_SINGULARITY_CACHEDIR[/]': {os.environ['NXF_SINGULARITY_CACHEDIR']}") @@ -213,11 +212,10 @@ def download_workflow_tower(self): log.info("Collecting workflow from GitHub") self.workflow_repo = WorkflowRepo( - remote_url=f"git@github.com:{self.pipeline}.git", revision=self.revision, commit=self.wf_sha + remote_url=f"git@github.com:{self.pipeline}.git", + revision=self.revision[0] if self.revision else None, + commit=list(self.wf_sha.values())[0] if bool(self.wf_sha) else "", ) - import pdb - - pdb.set_trace() log.info("Downloading centralised configs from GitHub") def prompt_pipeline_name(self): @@ -230,39 +228,44 @@ def prompt_pipeline_name(self): def prompt_revision(self): """Prompt for pipeline revision / branch""" # Prompt user for revision tag if '--revision' was not set - if self.revision is None: - self.revision = nf_core.utils.prompt_pipeline_release_branch(self.wf_revisions, self.wf_branches) + # If --tower is specified, allow to select multiple revisions + + if not bool(self.revision): + self.revision.extend( + nf_core.utils.prompt_pipeline_release_branch(self.wf_revisions, self.wf_branches, multiple=self.tower) + ) def get_revision_hash(self): """Find specified revision / branch hash""" - # Branch - if self.revision in self.wf_branches.keys(): - self.wf_sha = self.wf_branches[self.revision] - - # Revision - else: - for r in self.wf_revisions: - if r["tag_name"] == self.revision: - self.wf_sha = r["tag_sha"] - break + for revision in self.revision: # revision is a list of strings, but may be of length 1 + # Branch + if revision in self.wf_branches.keys(): + self.wf_sha[revision].append(self.wf_branches[revision]) - # Can't find the revisions or branch - throw an error + # Revision else: - log.info( - "Available {} revisions: '{}'".format( - self.pipeline, "', '".join([r["tag_name"] for r in self.wf_revisions]) + for r in self.wf_revisions: + if r["tag_name"] == revision: + self.wf_sha[revision].append(r["tag_sha"]) + break + + # Can't find the revisions or branch - throw an error + else: + log.info( + "Available {} revisions: '{}'".format( + self.pipeline, "', '".join([r["tag_name"] for r in self.wf_revisions]) + ) ) - ) - log.info("Available {} branches: '{}'".format(self.pipeline, "', '".join(self.wf_branches.keys()))) - raise AssertionError(f"Not able to find revision / branch '{self.revision}' for {self.pipeline}") + log.info("Available {} branches: '{}'".format(self.pipeline, "', '".join(self.wf_branches.keys()))) + raise AssertionError(f"Not able to find revision / branch '{revision}' for {self.pipeline}") # Set the outdir if not self.outdir: - self.outdir = f"{self.pipeline.replace('/', '-').lower()}-{self.revision}" + self.outdir = f"{self.pipeline.replace('/', '-').lower()}-{self.revision[0] if self.revision else ''}" # Set the download URL and return - self.wf_download_url = f"https://github.com/{self.pipeline}/archive/{self.wf_sha}.zip" + self.wf_download_url = f"https://github.com/{self.pipeline}/archive/{list(self.wf_sha.values())[0] if bool(self.wf_sha) else ''}.zip" def prompt_container_download(self): """Prompt whether to download container images or not""" @@ -378,12 +381,6 @@ def prompt_compression_type(self): if self.compress_type == "none": self.compress_type = None - def prompt_config_inclusion(self): - """Prompt for inclusion of institutional configurations""" - self.include_configs = questionary.confirm( - "Include the institutional configuration files into the download?" - ).ask() - def download_wf_files(self): """Downloads workflow files from GitHub to the :attr:`self.outdir`.""" log.debug(f"Downloading {self.wf_download_url}") @@ -394,7 +391,7 @@ def download_wf_files(self): zipfile.extractall(self.outdir) # Rename the internal directory name to be more friendly - gh_name = f"{self.pipeline}-{self.wf_sha}".split("/")[-1] + gh_name = f"{self.pipeline}-{list(self.wf_sha.values())[0] if bool(self.wf_sha) else ''}".split("/")[-1] os.rename(os.path.join(self.outdir, gh_name), os.path.join(self.outdir, "workflow")) # Make downloaded files executable @@ -795,7 +792,7 @@ def singularity_pull_image(self, container, out_path, cache_path, progress): if lines: # something went wrong with the container retrieval if any("FATAL: " in line for line in lines): - log.info("Singularity container retrieval fialed with the following error:") + log.info("Singularity container retrieval failed with the following error:") log.info("".join(lines)) raise FileNotFoundError(f'The container "{container}" is unavailable.\n{"".join(lines)}') @@ -855,14 +852,15 @@ def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=Fa Args: remote_url (str): The URL of the remote repository. Defaults to None. + self.revision (list): The revision to use. A list of strings. commit (str): The commit to clone. Defaults to None. no_pull (bool, optional): Whether to skip the pull step. Defaults to False. hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. """ self.remote_url = remote_url - self.revision = revision - self.commit = commit + self.revision = [].extend(revision) if revision else [] + self.commit = [].extend(commit) if commit else [] self.hide_progress = hide_progress self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) diff --git a/nf_core/utils.py b/nf_core/utils.py index 3fa72d8b53..cef1cf3272 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -823,12 +823,13 @@ def prompt_remote_pipeline_name(wfs): raise AssertionError(f"Not able to find pipeline '{pipeline}'") -def prompt_pipeline_release_branch(wf_releases, wf_branches): +def prompt_pipeline_release_branch(wf_releases, wf_branches, multiple=False): """Prompt for pipeline release / branch Args: wf_releases (array): Array of repo releases as returned by the GitHub API wf_branches (array): Array of repo branches, as returned by the GitHub API + multiple (bool): Allow selection of multiple releases & branches (for Tower) Returns: choice (str): Selected release / branch name @@ -850,7 +851,12 @@ def prompt_pipeline_release_branch(wf_releases, wf_branches): if len(choices) == 0: return False - return questionary.select("Select release / branch:", choices=choices, style=nfcore_question_style).unsafe_ask() + if multiple: + return questionary.checkbox( + "Select release / branch:", choices=choices, style=nfcore_question_style + ).unsafe_ask() + else: + return questionary.select("Select release / branch:", choices=choices, style=nfcore_question_style).unsafe_ask() def get_repo_releases_branches(pipeline, wfs): From e17a8e9dba648575824c6714353f149aa213df4b Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 28 Feb 2023 15:19:53 +0100 Subject: [PATCH 16/42] Rewrite get_revision_hash() function to accomodate multiple revisions. --- nf_core/download.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index ff06faecdc..87a618cee0 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -241,13 +241,13 @@ def get_revision_hash(self): for revision in self.revision: # revision is a list of strings, but may be of length 1 # Branch if revision in self.wf_branches.keys(): - self.wf_sha[revision].append(self.wf_branches[revision]) + self.wf_sha = {**self.wf_sha, revision: self.wf_branches[revision]} # Revision else: for r in self.wf_revisions: if r["tag_name"] == revision: - self.wf_sha[revision].append(r["tag_sha"]) + self.wf_sha = {**self.wf_sha, revision: r["tag_sha"]} break # Can't find the revisions or branch - throw an error @@ -262,10 +262,13 @@ def get_revision_hash(self): # Set the outdir if not self.outdir: - self.outdir = f"{self.pipeline.replace('/', '-').lower()}-{self.revision[0] if self.revision else ''}" + self.outdir = ( + f"{self.pipeline.replace('/', '-').lower()}-{'_'.join(self.revision) if self.revision else ''}" + ) - # Set the download URL and return - self.wf_download_url = f"https://github.com/{self.pipeline}/archive/{list(self.wf_sha.values())[0] if bool(self.wf_sha) else ''}.zip" + if not self.tower and bool(self.wf_sha): + # Set the download URL and return - only applicable for classic downloads + self.wf_download_url = f"https://github.com/{self.pipeline}/archive/{list(self.wf_sha.values())[0]}.zip" def prompt_container_download(self): """Prompt whether to download container images or not""" From ecaabf8bd5e07a1e289f51086fc3ea6778f70f06 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 28 Feb 2023 15:40:02 +0100 Subject: [PATCH 17/42] The 2nd revivial of the config choice. Now available for archives with only one revision. --- nf_core/download.py | 55 ++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 87a618cee0..41b1fe1458 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -100,6 +100,7 @@ def __init__( self.compress_type = compress_type self.force = force self.tower = tower + self.include_configs = True self.container = container self.singularity_cache_only = singularity_cache_only self.parallel_downloads = parallel_downloads @@ -126,6 +127,9 @@ def download_workflow(self): ) self.prompt_revision() self.get_revision_hash() + # inclusion of configs is unsuitable for multi-revision repositories. + if len(self.revision) == 1: + self.prompt_config_inclusion() self.prompt_container_download() self.prompt_use_singularity_cachedir() self.prompt_singularity_cachedir_only() @@ -149,6 +153,10 @@ def download_workflow(self): else: summary_log.append(f"Output directory: '{self.outdir}'") + if len(self.revision) == 1: + # Only show entry, if option was prompted. + summary_log.append(f"Include default institutional configuration: '{self.include_configs}'") + summary_log.append(f"Enabled for seqeralabs® Nextflow Tower: '{self.tower}'") # Check that the outdir doesn't already exist @@ -183,14 +191,15 @@ def download_workflow_classic(self): self.download_wf_files() # Download the centralised configs - log.info("Downloading centralised configs from GitHub") - self.download_configs() - try: - self.wf_use_local_configs() - except FileNotFoundError as e: - log.error("Error editing pipeline config file to use local configs!") - log.critical(e) - sys.exit(1) + if self.include_configs: + log.info("Downloading centralised configs from GitHub") + self.download_configs() + try: + self.wf_use_local_configs() + except FileNotFoundError as e: + log.error("Error editing pipeline config file to use local configs!") + log.critical(e) + sys.exit(1) # Download the singularity images if self.container == "singularity": @@ -213,10 +222,12 @@ def download_workflow_tower(self): self.workflow_repo = WorkflowRepo( remote_url=f"git@github.com:{self.pipeline}.git", - revision=self.revision[0] if self.revision else None, - commit=list(self.wf_sha.values())[0] if bool(self.wf_sha) else "", + revision=self.revision if self.revision else None, + commit=self.wf_sha.values if bool(self.wf_sha) else None, ) - log.info("Downloading centralised configs from GitHub") + + if self.include_configs: + log.info("Downloading centralised configs from GitHub") def prompt_pipeline_name(self): """Prompt for the pipeline name if not set with a flag""" @@ -231,10 +242,13 @@ def prompt_revision(self): # If --tower is specified, allow to select multiple revisions if not bool(self.revision): - self.revision.extend( - nf_core.utils.prompt_pipeline_release_branch(self.wf_revisions, self.wf_branches, multiple=self.tower) + temp = nf_core.utils.prompt_pipeline_release_branch( + self.wf_revisions, self.wf_branches, multiple=self.tower ) + # have to make sure that self.revision is a list of strings, regardless if temp is str or list of strings. + self.revision.append(temp) if isinstance(temp, str) else self.revision.extend(temp) + def get_revision_hash(self): """Find specified revision / branch hash""" @@ -270,6 +284,13 @@ def get_revision_hash(self): # Set the download URL and return - only applicable for classic downloads self.wf_download_url = f"https://github.com/{self.pipeline}/archive/{list(self.wf_sha.values())[0]}.zip" + def prompt_config_inclusion(self): + """Prompt for inclusion of institutional configurations""" + self.include_configs = questionary.confirm( + "Include the nf-core's default institutional configuration files into the download?", + style=nf_core.utils.nfcore_question_style, + ).ask() + def prompt_container_download(self): """Prompt whether to download container images or not""" @@ -855,12 +876,16 @@ def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=Fa Args: remote_url (str): The URL of the remote repository. Defaults to None. - self.revision (list): The revision to use. A list of strings. - commit (str): The commit to clone. Defaults to None. + self.revision (list of str): The revision to use. A list of strings. + commit (dict of str): The commit to clone. Defaults to None. no_pull (bool, optional): Whether to skip the pull step. Defaults to False. hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. """ + import pdb + + pdb.set_trace() + self.remote_url = remote_url self.revision = [].extend(revision) if revision else [] self.commit = [].extend(commit) if commit else [] From 6d04ec8287e2bb4f47aa373beeadb149cb75e0bb Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Sat, 4 Mar 2023 15:23:43 +0100 Subject: [PATCH 18/42] Inclusion of the revision in the output file name is problematic with the new ability to download multiple revisions at once. This resulted in loooooooong filenames. --- nf_core/download.py | 37 ++++++++++++++++++++++++++++--------- nf_core/utils.py | 18 +++++++++++++----- 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 41b1fe1458..f6646d29ab 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -3,6 +3,7 @@ from __future__ import print_function import concurrent.futures +from datetime import datetime import git from git.exc import GitCommandError, InvalidGitRepositoryError import io @@ -139,7 +140,10 @@ def download_workflow(self): log.critical(e) sys.exit(1) - summary_log = [f"Pipeline revision: '{','.join(self.revision)}'", f"Pull containers: '{self.container}'"] + summary_log = [ + f"Pipeline revision: '{','.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',['+str(len(self.revision)-2)+' more revisions],'+self.revision[-1]}'", + f"Pull containers: '{self.container}'", + ] if self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None: summary_log.append(f"Using [blue]$NXF_SINGULARITY_CACHEDIR[/]': {os.environ['NXF_SINGULARITY_CACHEDIR']}") @@ -242,12 +246,29 @@ def prompt_revision(self): # If --tower is specified, allow to select multiple revisions if not bool(self.revision): - temp = nf_core.utils.prompt_pipeline_release_branch( + (choice, tag_set) = nf_core.utils.prompt_pipeline_release_branch( self.wf_revisions, self.wf_branches, multiple=self.tower ) - # have to make sure that self.revision is a list of strings, regardless if temp is str or list of strings. - self.revision.append(temp) if isinstance(temp, str) else self.revision.extend(temp) + # The checkbox() prompt unfortunately does not support passing a Validator, + # so a user who keeps pressing Enter will bump through the selection + + # bool(choice), bool(tag_set): + ############################# + # True, True: A choice was made and revisions were available. + # False, True: No selection was made, but revisions were available -> defaults to all available. + # False, False: No selection was made because no revisions were available -> raise AssertionError. + # True, False: Congratulations, you found a bug! That combo shouldn't happen. + + if bool(choice): + # have to make sure that self.revision is a list of strings, regardless if temp is str or list of strings. + self.revision.append(choice) if isinstance(choice, str) else self.revision.extend(choice) + else: + if bool(tag_set): + self.revision = tag_set + log.info("No particular revision was selected, all available will be downloaded.") + else: + raise AssertionError(f"No revisions of {self.pipeline} available for download.") def get_revision_hash(self): """Find specified revision / branch hash""" @@ -276,9 +297,7 @@ def get_revision_hash(self): # Set the outdir if not self.outdir: - self.outdir = ( - f"{self.pipeline.replace('/', '-').lower()}-{'_'.join(self.revision) if self.revision else ''}" - ) + self.outdir = f"{self.pipeline.replace('/', '-').lower()}_{datetime.now().strftime('%Y-%m-%d_%H-%M')}" if not self.tower and bool(self.wf_sha): # Set the download URL and return - only applicable for classic downloads @@ -876,8 +895,8 @@ def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=Fa Args: remote_url (str): The URL of the remote repository. Defaults to None. - self.revision (list of str): The revision to use. A list of strings. - commit (dict of str): The commit to clone. Defaults to None. + self.revision (list of str): The revisions to include. A list of strings. + commits (dict of str): The checksums to linked with the revisions. no_pull (bool, optional): Whether to skip the pull step. Defaults to False. hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. diff --git a/nf_core/utils.py b/nf_core/utils.py index cef1cf3272..f73d7c9dc4 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -834,29 +834,37 @@ def prompt_pipeline_release_branch(wf_releases, wf_branches, multiple=False): Returns: choice (str): Selected release / branch name """ - # Prompt user for release tag + # Prompt user for release tag, tag_set will contain all available. choices = [] + tag_set = [] # Releases if len(wf_releases) > 0: for tag in map(lambda release: release.get("tag_name"), wf_releases): tag_display = [("fg:ansiblue", f"{tag} "), ("class:choice-default", "[release]")] choices.append(questionary.Choice(title=tag_display, value=tag)) + tag_set.append(tag) # Branches for branch in wf_branches.keys(): branch_display = [("fg:ansiyellow", f"{branch} "), ("class:choice-default", "[branch]")] choices.append(questionary.Choice(title=branch_display, value=branch)) + tag_set.append(branch) if len(choices) == 0: return False if multiple: - return questionary.checkbox( - "Select release / branch:", choices=choices, style=nfcore_question_style - ).unsafe_ask() + return ( + questionary.checkbox("Select release / branch:", choices=choices, style=nfcore_question_style).unsafe_ask(), + tag_set, + ) + else: - return questionary.select("Select release / branch:", choices=choices, style=nfcore_question_style).unsafe_ask() + return ( + questionary.select("Select release / branch:", choices=choices, style=nfcore_question_style).unsafe_ask(), + tag_set, + ) def get_repo_releases_branches(pipeline, wfs): From 7642e4fb86384540175febd25b4919e0a21dde0e Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 8 Mar 2023 18:33:17 +0100 Subject: [PATCH 19/42] Allow multiple instances of the -r argument. Needed for scripted download. Ultimately, this now means that I also have to implement multiple version downloads for the classic download. Just downloading the first doesn't seem to make sense from a UX perspective. --- nf_core/__main__.py | 7 ++++++- nf_core/download.py | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 521454eb99..056242aac2 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -209,7 +209,12 @@ def launch(pipeline, id, revision, command_only, params_in, params_out, save_all # nf-core download @nf_core_cli.command() @click.argument("pipeline", required=False, metavar="") -@click.option("-r", "--revision", type=str, help="Pipeline release") +@click.option( + "-r", + "--revision", + multiple=True, + help="Pipeline release to download. Multiple invocations are possible.", +) @click.option("-o", "--outdir", type=str, help="Output directory") @click.option( "-x", "--compress", type=click.Choice(["tar.gz", "tar.bz2", "zip", "none"]), help="Archive compression type" diff --git a/nf_core/download.py b/nf_core/download.py index f6646d29ab..61d4f2c1c8 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -95,7 +95,12 @@ def __init__( parallel_downloads=4, ): self.pipeline = pipeline - self.revision = [].extend(revision) if revision else [] + if isinstance(revision, str): + self.revision = [revision] + elif isinstance(revision, tuple): + self.revision = [*revision] + else: + self.revision = [] self.outdir = outdir self.output_filename = None self.compress_type = compress_type From 7f93edbc81bb267c2a0b9f5b85feb7c8f98abbea Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 28 Mar 2023 16:02:23 +0200 Subject: [PATCH 20/42] Finished updating the prompts for the dialogues. --- nf_core/download.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 61d4f2c1c8..ad8da971d1 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -133,8 +133,8 @@ def download_workflow(self): ) self.prompt_revision() self.get_revision_hash() - # inclusion of configs is unsuitable for multi-revision repositories. - if len(self.revision) == 1: + # inclusion of configs is unnecessary for Tower. + if not self.tower: self.prompt_config_inclusion() self.prompt_container_download() self.prompt_use_singularity_cachedir() @@ -146,7 +146,7 @@ def download_workflow(self): sys.exit(1) summary_log = [ - f"Pipeline revision: '{','.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',['+str(len(self.revision)-2)+' more revisions],'+self.revision[-1]}'", + f"Pipeline revision: '{', '.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',['+str(len(self.revision)-2)+' more revisions],'+self.revision[-1]}'", f"Pull containers: '{self.container}'", ] if self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None: @@ -187,12 +187,6 @@ def download_workflow(self): # Summary log log.info("Saving '{}'\n {}".format(self.pipeline, "\n ".join(summary_log))) - # Actually download the workflow - if not self.tower: - self.download_workflow_classic() - else: - self.download_workflow_tower() - def download_workflow_classic(self): """Downloads a nf-core workflow from GitHub to the local file system in a self-contained manner.""" # Download the pipeline files @@ -256,7 +250,7 @@ def prompt_revision(self): ) # The checkbox() prompt unfortunately does not support passing a Validator, - # so a user who keeps pressing Enter will bump through the selection + # so a user who keeps pressing Enter will bump through the selection without choice. # bool(choice), bool(tag_set): ############################# @@ -266,7 +260,7 @@ def prompt_revision(self): # True, False: Congratulations, you found a bug! That combo shouldn't happen. if bool(choice): - # have to make sure that self.revision is a list of strings, regardless if temp is str or list of strings. + # have to make sure that self.revision is a list of strings, regardless if choice is str or list of strings. self.revision.append(choice) if isinstance(choice, str) else self.revision.extend(choice) else: if bool(tag_set): @@ -302,7 +296,10 @@ def get_revision_hash(self): # Set the outdir if not self.outdir: - self.outdir = f"{self.pipeline.replace('/', '-').lower()}_{datetime.now().strftime('%Y-%m-%d_%H-%M')}" + if len(self.wf_sha) > 1: + self.outdir = f"{self.pipeline.replace('/', '-').lower()}_{datetime.now().strftime('%Y-%m-%d_%H-%M')}" + else: + self.outdir = f"{self.pipeline.replace('/', '-').lower()}_{self.revision[0]}" if not self.tower and bool(self.wf_sha): # Set the download URL and return - only applicable for classic downloads From 12bf9428868e1e4af2fd408ba4a2c8d05643988f Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 13 Apr 2023 17:37:20 +0200 Subject: [PATCH 21/42] Converted the self.wf_download_url into a dict. --- nf_core/download.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index ad8da971d1..8e2ead0f86 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -114,7 +114,7 @@ def __init__( self.wf_revisions = {} self.wf_branches = {} self.wf_sha = {} - self.wf_download_url = None + self.wf_download_url = {} self.nf_config = {} self.containers = [] @@ -162,7 +162,7 @@ def download_workflow(self): else: summary_log.append(f"Output directory: '{self.outdir}'") - if len(self.revision) == 1: + if not self.tower: # Only show entry, if option was prompted. summary_log.append(f"Include default institutional configuration: '{self.include_configs}'") @@ -187,8 +187,20 @@ def download_workflow(self): # Summary log log.info("Saving '{}'\n {}".format(self.pipeline, "\n ".join(summary_log))) + # Perform the actual download + if self.tower: + # self.download_workflow_tower() + pass + else: + self.download_workflow_classic() + def download_workflow_classic(self): """Downloads a nf-core workflow from GitHub to the local file system in a self-contained manner.""" + + import pdb + + pdb.set_trace() + # Download the pipeline files log.info("Downloading workflow files from GitHub") self.download_wf_files() @@ -301,9 +313,13 @@ def get_revision_hash(self): else: self.outdir = f"{self.pipeline.replace('/', '-').lower()}_{self.revision[0]}" - if not self.tower and bool(self.wf_sha): - # Set the download URL and return - only applicable for classic downloads - self.wf_download_url = f"https://github.com/{self.pipeline}/archive/{list(self.wf_sha.values())[0]}.zip" + if not self.tower: + for revision, wf_sha in self.wf_sha.items(): + # Set the download URL and return - only applicable for classic downloads + self.wf_download_url = { + **self.wf_download_url, + revision: f"https://github.com/{self.pipeline}/archive/{wf_sha}.zip", + } def prompt_config_inclusion(self): """Prompt for inclusion of institutional configurations""" @@ -903,9 +919,6 @@ def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=Fa hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. """ - import pdb - - pdb.set_trace() self.remote_url = remote_url self.revision = [].extend(revision) if revision else [] From 2ff62f31b4ec9aa7998f04bd13ebf1e25b0855b5 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 14 Apr 2023 17:08:09 +0200 Subject: [PATCH 22/42] Enable multi-revision classic download. --- nf_core/download.py | 135 ++++++++++++++++++++++++++++++-------------- 1 file changed, 93 insertions(+), 42 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 8e2ead0f86..3327f7580d 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -197,28 +197,32 @@ def download_workflow(self): def download_workflow_classic(self): """Downloads a nf-core workflow from GitHub to the local file system in a self-contained manner.""" - import pdb - - pdb.set_trace() - - # Download the pipeline files - log.info("Downloading workflow files from GitHub") - self.download_wf_files() - - # Download the centralised configs + # Download the centralised configs first if self.include_configs: log.info("Downloading centralised configs from GitHub") self.download_configs() - try: - self.wf_use_local_configs() - except FileNotFoundError as e: - log.error("Error editing pipeline config file to use local configs!") - log.critical(e) - sys.exit(1) + + # Download the pipeline files for each selected revision + log.info("Downloading workflow files from GitHub") + + for item in zip(self.revision, self.wf_sha.values(), self.wf_download_url.values()): + revision_dirname = self.download_wf_files(revision=item[0], wf_sha=item[1], download_url=item[2]) + + if self.include_configs: + try: + self.wf_use_local_configs(revision_dirname) + except FileNotFoundError as e: + log.error("Error editing pipeline config file to use local configs!") + log.critical(e) + sys.exit(1) + + # Collect all required singularity images + if self.container == "singularity": + self.find_container_images(revision_dirname) # Download the singularity images if self.container == "singularity": - self.find_container_images() + log.info(f"Found {len(self.containers)} container{'s' if len(self.containers) > 1 else ''}") try: self.get_singularity_images() except OSError as e: @@ -442,24 +446,29 @@ def prompt_compression_type(self): if self.compress_type == "none": self.compress_type = None - def download_wf_files(self): + def download_wf_files(self, revision, wf_sha, download_url): """Downloads workflow files from GitHub to the :attr:`self.outdir`.""" - log.debug(f"Downloading {self.wf_download_url}") + log.debug(f"Downloading {download_url}") # Download GitHub zip file into memory and extract - url = requests.get(self.wf_download_url) + url = requests.get(download_url) with ZipFile(io.BytesIO(url.content)) as zipfile: zipfile.extractall(self.outdir) + # create a filesystem-safe version of the revision name for the directory + revision_dirname = re.sub("[^0-9a-zA-Z]+", "_", revision) + # Rename the internal directory name to be more friendly - gh_name = f"{self.pipeline}-{list(self.wf_sha.values())[0] if bool(self.wf_sha) else ''}".split("/")[-1] - os.rename(os.path.join(self.outdir, gh_name), os.path.join(self.outdir, "workflow")) + gh_name = f"{self.pipeline}-{wf_sha if bool(wf_sha) else ''}".split("/")[-1] + os.rename(os.path.join(self.outdir, gh_name), os.path.join(self.outdir, revision_dirname)) # Make downloaded files executable - for dirpath, _, filelist in os.walk(os.path.join(self.outdir, "workflow")): + for dirpath, _, filelist in os.walk(os.path.join(self.outdir, revision_dirname)): for fname in filelist: os.chmod(os.path.join(dirpath, fname), 0o775) + return revision_dirname + def download_configs(self): """Downloads the centralised config profiles from nf-core/configs to :attr:`self.outdir`.""" configs_zip_url = "https://github.com/nf-core/configs/archive/master.zip" @@ -479,9 +488,9 @@ def download_configs(self): for fname in filelist: os.chmod(os.path.join(dirpath, fname), 0o775) - def wf_use_local_configs(self): + def wf_use_local_configs(self, revision_dirname): """Edit the downloaded nextflow.config file to use the local config files""" - nfconfig_fn = os.path.join(self.outdir, "workflow", "nextflow.config") + nfconfig_fn = os.path.join(self.outdir, revision_dirname, "nextflow.config") find_str = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" repl_str = "${projectDir}/../configs/" log.debug(f"Editing 'params.custom_config_base' in '{nfconfig_fn}'") @@ -507,7 +516,7 @@ def wf_use_local_configs(self): with open(nfconfig_fn, "w") as nfconfig_fh: nfconfig_fh.write(nfconfig) - def find_container_images(self): + def find_container_images(self, revision_dirname): """Find container image names for workflow. Starts by using `nextflow config` to pull out any process.container @@ -533,15 +542,23 @@ def find_container_images(self): 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : 'biocontainers/fastqc:0.11.9--0' }" + Later DSL2, variable is being used: + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + "https://depot.galaxyproject.org/singularity/${container_id}" : + "quay.io/biocontainers/${container_id}" }" + + container_id = 'mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:afaaa4c6f5b308b4b6aa2dd8e99e1466b2a6b0cd-0' + DSL1 / Special case DSL2: container "nfcore/cellranger:6.0.2" """ log.debug("Fetching container names for workflow") - containers_raw = [] + # since this is run for multiple versions now, account for previous invocations + containers_raw = [] if not self.containers else self.containers # Use linting code to parse the pipeline nextflow config - self.nf_config = nf_core.utils.fetch_wf_config(os.path.join(self.outdir, "workflow")) + self.nf_config = nf_core.utils.fetch_wf_config(os.path.join(self.outdir, revision_dirname)) # Find any config variables that look like a container for k, v in self.nf_config.items(): @@ -549,7 +566,7 @@ def find_container_images(self): containers_raw.append(v.strip('"').strip("'")) # Recursive search through any DSL2 module files for container spec lines. - for subdir, _, files in os.walk(os.path.join(self.outdir, "workflow", "modules")): + for subdir, _, files in os.walk(os.path.join(self.outdir, revision_dirname, "modules")): for file in files: if file.endswith(".nf"): file_path = os.path.join(subdir, file) @@ -569,18 +586,54 @@ def find_container_images(self): break # Prioritise http, exit loop as soon as we find it # No https download, is the entire container string a docker URI? - else: - # Thanks Stack Overflow for the regex: https://stackoverflow.com/a/39672069/713980 - docker_regex = r"^(?:(?=[^:\/]{1,253})(?!-)[a-zA-Z0-9-]{1,63}(? 1 else ''}") - def get_singularity_images(self): """Loop through container names and download Singularity images""" From 986f791a438315eec0ae49190d3abda381a7e8ef Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 17 Apr 2023 15:47:00 +0200 Subject: [PATCH 23/42] Small tweaks to ensure that tools doesn't bail out if there is no symlink from singularity to apptainer on the system. --- nf_core/download.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 3327f7580d..a09e457e36 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -689,8 +689,11 @@ def get_singularity_images(self): containers_pull.append([container, out_path, cache_path]) # Exit if we need to pull images and Singularity is not installed - if len(containers_pull) > 0 and shutil.which("singularity") is None: - raise OSError("Singularity is needed to pull images, but it is not installed") + if len(containers_pull) > 0: + if not shutil.which("singularity") or not shutil.which("apptainer"): + raise OSError( + "Singularity/Apptainer is needed to pull images, but it is not installed or not in $PATH" + ) # Go through each method of fetching containers in order for container in containers_exist: @@ -881,7 +884,12 @@ def singularity_pull_image(self, container, out_path, cache_path, progress): # Pull using singularity address = f"docker://{container.replace('docker://', '')}" - singularity_command = ["singularity", "pull", "--name", output_path, address] + if shutil.which("singularity"): + singularity_command = ["singularity", "pull", "--name", output_path, address] + elif shutil.which("apptainer"): + singularity_command = ["apptainer", "pull", "--name", output_path, address] + else: + raise OSError("Singularity/Apptainer is needed to pull images, but it is not installed or not in $PATH") log.debug(f"Building singularity image: {address}") log.debug(f"Singularity command: {' '.join(singularity_command)}") From 6f95829a2269be9d43c488a84c4a9c9d06344a02 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 18 Apr 2023 18:33:49 +0200 Subject: [PATCH 24/42] Initialise the Git repo clone of the workflow. --- nf_core/download.py | 80 +++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 50 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index a09e457e36..8c66517972 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -189,8 +189,7 @@ def download_workflow(self): # Perform the actual download if self.tower: - # self.download_workflow_tower() - pass + self.download_workflow_tower() else: self.download_workflow_classic() @@ -242,9 +241,14 @@ def download_workflow_tower(self): self.workflow_repo = WorkflowRepo( remote_url=f"git@github.com:{self.pipeline}.git", revision=self.revision if self.revision else None, - commit=self.wf_sha.values if bool(self.wf_sha) else None, + commit=self.wf_sha.values() if bool(self.wf_sha) else None, + in_cache=False, ) + import pdb + + pdb.set_trace() + if self.include_configs: log.info("Downloading centralised configs from GitHub") @@ -457,6 +461,9 @@ def download_wf_files(self, revision, wf_sha, download_url): # create a filesystem-safe version of the revision name for the directory revision_dirname = re.sub("[^0-9a-zA-Z]+", "_", revision) + # account for name collisions, if there is a branch / release named "configs" or "singularity-images" + if revision_dirname in ["configs", "singularity-images"]: + revision_dirname = re.sub("[^0-9a-zA-Z]+", "_", self.pipeline + revision_dirname) # Rename the internal directory name to be more friendly gh_name = f"{self.pipeline}-{wf_sha if bool(wf_sha) else ''}".split("/")[-1] @@ -980,16 +987,25 @@ def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=Fa """ self.remote_url = remote_url - self.revision = [].extend(revision) if revision else [] - self.commit = [].extend(commit) if commit else [] - self.hide_progress = hide_progress + if isinstance(revision, str): + self.revision = [revision] + elif isinstance(revision, list): + self.revision = [*revision] + else: + self.revision = [] + if isinstance(commit, str): + self.commit = [commit] + elif isinstance(revision, list): + self.commit = [*commit] + else: + self.commit = [] self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) - self.setup_local_repo(remote_url, commit, hide_progress, in_cache=in_cache) + self.setup_local_repo(remote_url, commit=None, in_cache=in_cache) def __repr__(self): """Called by print, creates representation of object""" - return f"" + return f"" def retry_setup_local_repo(self): if rich.prompt.Confirm.ask(f"[violet]Delete local cache '{self.local_repo_dir}' and try again?"): @@ -999,7 +1015,7 @@ def retry_setup_local_repo(self): else: raise LookupError("Exiting due to error with local modules git repo") - def setup_local_repo(self, remote, commit, hide_progress=False, in_cache=True): + def setup_local_repo(self, remote, commit=None, in_cache=True): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from @@ -1007,7 +1023,7 @@ def setup_local_repo(self, remote, commit, hide_progress=False, in_cache=True): Args: remote (str): git url of remote - branch (str): name of branch to use + commit (str): name of branch to checkout from (optional) hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. Sets self.repo @@ -1022,7 +1038,7 @@ def setup_local_repo(self, remote, commit, hide_progress=False, in_cache=True): rich.progress.BarColumn(bar_width=None), "[bold yellow]{task.fields[state]}", transient=True, - disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, + disable=os.environ.get("HIDE_PROGRESS", None) is not None, ) with pbar: self.repo = git.Repo.clone_from( @@ -1045,7 +1061,7 @@ def setup_local_repo(self, remote, commit, hide_progress=False, in_cache=True): rich.progress.BarColumn(bar_width=None), "[bold yellow]{task.fields[state]}", transient=True, - disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, + disable=os.environ.get("HIDE_PROGRESS", None) is not None, ) with pbar: self.repo.remotes.origin.fetch( @@ -1057,41 +1073,5 @@ def setup_local_repo(self, remote, commit, hide_progress=False, in_cache=True): log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") self.retry_setup_local_repo() finally: - self.repo.git.checkout(commit) - - def add_nfcore_configs(self, commit, hide_progress=False): - """ - Pulls the configuration profiles from the nf-core/config repository on GitHub. - - Args: - commit: The config version to pull - hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. - Sets self.repo - """ - - try: - if os.path.exists(self.local_repo_dir): - try: - pbar = rich.progress.Progress( - "[bold blue]{task.description}", - rich.progress.BarColumn(bar_width=None), - "[bold yellow]{task.fields[state]}", - transient=True, - disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, - ) - with pbar: - self.configs = git.Submodule.add( - self.repo, - "nf-core configuration", - "./conf_institutional", - f"git@github.com:nf-core/configs.git", - progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Adding configuration"), - ) - except GitCommandError: - raise LookupError(f"Failed to retrieve configuration: `{remote}`") - - except (GitCommandError, InvalidGitRepositoryError) as e: - log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") - self.retry_setup_local_repo() - finally: - self.repo.git.checkout(commit) + if commit: + self.repo.git.checkout(commit) From 760fcaad48032aea7232e99bfdcb205d694f66e6 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 19 Apr 2023 18:25:01 +0200 Subject: [PATCH 25/42] WorkflowRepo attributes and functions. --- nf_core/download.py | 68 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 12 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 8c66517972..43a27e2f51 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -245,12 +245,29 @@ def download_workflow_tower(self): in_cache=False, ) - import pdb + # Remove tags for those revisions that had not been selected + self.workflow_repo.tidy_tags() - pdb.set_trace() + # extract the required containers + if self.container == "singularity": + for commit in self.wf_sha.values(): + # Checkout the repo in the current revision + self.workflow_repo.checkout(commit) + # Collect all required singularity images + self.find_container_images(self.workflow_repo.access()) + + # Download the singularity images + log.info(f"Found {len(self.containers)} container{'s' if len(self.containers) > 1 else ''}") + try: + self.get_singularity_images() + except OSError as e: + log.critical(f"[red]{e}[/]") + sys.exit(1) - if self.include_configs: - log.info("Downloading centralised configs from GitHub") + # Compress into an archive + if self.compress_type is not None: + log.info("Compressing images") + self.compress_download() def prompt_pipeline_name(self): """Prompt for the pipeline name if not set with a flag""" @@ -973,7 +990,15 @@ class WorkflowRepo(SyncedRepo): """ - def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=False, in_cache=True): + def __init__( + self, + remote_url, + revision, + commit, + no_pull=False, + hide_progress=False, + in_cache=True, + ): """ Initializes the object and clones the workflows git repository if it is not already present @@ -985,7 +1010,6 @@ def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=Fa hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. """ - self.remote_url = remote_url if isinstance(revision, str): self.revision = [revision] @@ -1001,11 +1025,23 @@ def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=Fa self.commit = [] self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) - self.setup_local_repo(remote_url, commit=None, in_cache=in_cache) + self.setup_local_repo(remote_url, in_cache=in_cache) + + # expose some instance attributes + self.tags = self.repo.tags def __repr__(self): """Called by print, creates representation of object""" - return f"" + return f"" + + def access(self): + if os.path.exists(self.local_repo_dir): + return self.local_repo_dir + else: + return None + + def checkout(self, commit): + return super().checkout(commit) def retry_setup_local_repo(self): if rich.prompt.Confirm.ask(f"[violet]Delete local cache '{self.local_repo_dir}' and try again?"): @@ -1015,7 +1051,7 @@ def retry_setup_local_repo(self): else: raise LookupError("Exiting due to error with local modules git repo") - def setup_local_repo(self, remote, commit=None, in_cache=True): + def setup_local_repo(self, remote, in_cache=True): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from @@ -1072,6 +1108,14 @@ def setup_local_repo(self, remote, commit=None, in_cache=True): except (GitCommandError, InvalidGitRepositoryError) as e: log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") self.retry_setup_local_repo() - finally: - if commit: - self.repo.git.checkout(commit) + + def tidy_tags(self): + """ + Function to delete all tags that point to revisions that are not of interest to the downloader. + This allows a clutter-free experience in Tower. The commits are evidently still available. + """ + if self.revision and self.repo and self.repo.tags: + for tag in self.repo.tags: + if tag.name not in self.revision: + self.repo.delete_tag(tag) + self.tags = self.repo.tags From c381776fe37b4824789ba9e3d6fc31a1326ec8c6 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 21 Apr 2023 16:06:57 +0200 Subject: [PATCH 26/42] Finished the Tower download branch. --- nf_core/download.py | 76 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 43a27e2f51..e8fafa5b1a 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -133,12 +133,13 @@ def download_workflow(self): ) self.prompt_revision() self.get_revision_hash() - # inclusion of configs is unnecessary for Tower. + # Inclusion of configs is unnecessary for Tower. if not self.tower: self.prompt_config_inclusion() self.prompt_container_download() self.prompt_use_singularity_cachedir() self.prompt_singularity_cachedir_only() + # Nothing meaningful to compress here. if not self.tower: self.prompt_compression_type() except AssertionError as e: @@ -230,7 +231,7 @@ def download_workflow_classic(self): # Compress into an archive if self.compress_type is not None: - log.info("Compressing download..") + log.info("Compressing output into archive") self.compress_download() def download_workflow_tower(self): @@ -248,6 +249,9 @@ def download_workflow_tower(self): # Remove tags for those revisions that had not been selected self.workflow_repo.tidy_tags() + # create a bare clone of the modified repository needed for Tower + self.workflow_repo.bare_clone(os.path.join(self.outdir, self.output_filename)) + # extract the required containers if self.container == "singularity": for commit in self.wf_sha.values(): @@ -264,10 +268,9 @@ def download_workflow_tower(self): log.critical(f"[red]{e}[/]") sys.exit(1) - # Compress into an archive - if self.compress_type is not None: - log.info("Compressing images") - self.compress_download() + # Justify why compression is skipped for Tower downloads (Prompt is not shown, but CLI argument could have been set) + if self.compress_type is not None: + log.info("Compression choice is ignored for Tower downloads since nothing can be reasonably compressed.") def prompt_pipeline_name(self): """Prompt for the pipeline name if not set with a flag""" @@ -1019,11 +1022,12 @@ def __init__( self.revision = [] if isinstance(commit, str): self.commit = [commit] - elif isinstance(revision, list): + elif isinstance(commit, list): self.commit = [*commit] else: self.commit = [] self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) + self.retries = 0 # retries for setting up the locally cached repository self.setup_local_repo(remote_url, in_cache=in_cache) @@ -1043,13 +1047,24 @@ def access(self): def checkout(self, commit): return super().checkout(commit) - def retry_setup_local_repo(self): - if rich.prompt.Confirm.ask(f"[violet]Delete local cache '{self.local_repo_dir}' and try again?"): - log.info(f"Removing '{self.local_repo_dir}'") + def retry_setup_local_repo(self, skip_confirm=False): + self.retries += 1 + if skip_confirm or rich.prompt.Confirm.ask( + f"[violet]Delete local cache '{self.local_repo_dir}' and try again?" + ): + if ( + self.retries > 1 + ): # One unconfirmed retry is acceptable, but prevent infinite loops without user interaction. + log.error( + f"Errors with locally cached repository of '{self.fullname}'. Please delete '{self.local_repo_dir}' manually and try again." + ) + sys.exit(1) + if not skip_confirm: # Feedback to user for manual confirmation. + log.info(f"Removing '{self.local_repo_dir}'") shutil.rmtree(self.local_repo_dir) - self.setup_local_repo(self.remote, self.commit, self.hide_progress) + self.setup_local_repo(self.remote_url, in_cache=False) else: - raise LookupError("Exiting due to error with local modules git repo") + raise LookupError("Exiting due to error with locally cached Git repository.") def setup_local_repo(self, remote, in_cache=True): """ @@ -1113,9 +1128,38 @@ def tidy_tags(self): """ Function to delete all tags that point to revisions that are not of interest to the downloader. This allows a clutter-free experience in Tower. The commits are evidently still available. + + However, due to local caching, the downloader might also want access to revisions that had been deleted before. + In that case, don't bother with re-adding the tags and rather download anew from Github. """ if self.revision and self.repo and self.repo.tags: - for tag in self.repo.tags: - if tag.name not in self.revision: - self.repo.delete_tag(tag) - self.tags = self.repo.tags + desired_tags = self.revision.copy() + try: + for tag in self.repo.tags: + if tag.name not in self.revision: + self.repo.delete_tag(tag) + else: + desired_tags.remove(tag.name) + self.tags = self.repo.tags + if len(desired_tags) > 0: + log.info( + f"Locally cached version of the pipeline lacks selected revisions {', '.join(desired_tags)}. Downloading anew from GitHub..." + ) + self.retry_setup_local_repo(skip_confirm=True) + self.tidy_tags() + except (GitCommandError, InvalidGitRepositoryError) as e: + log.error(f"[red]Adapting your pipeline download unfortunately failed:[/]\n{e}\n") + self.retry_setup_local_repo(skip_confirm=True) + sys.exit(1) + + def bare_clone(self, destination): + if self.repo: + try: + destfolder = os.path.abspath(destination) + if not os.path.exists(destfolder): + os.makedirs(destfolder) + if os.path.exists(destination): + shutil.rmtree(os.path.abspath(destination)) + self.repo.clone(os.path.abspath(destination), bare=True) + except (OSError, GitCommandError, InvalidGitRepositoryError) as e: + log.error(f"[red]Failure to create the pipeline download[/]\n{e}\n") From 526a26e45ddc00fb4ef7969c5df822def07cb0c4 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 24 Apr 2023 14:55:24 +0200 Subject: [PATCH 27/42] Minor tweaks to the container download functionality. --- nf_core/download.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index e8fafa5b1a..cba7eb7dfb 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -235,7 +235,7 @@ def download_workflow_classic(self): self.compress_download() def download_workflow_tower(self): - """Create a bare-cloned git repository of the workflow that includes the configurations, such it can be launched with `tw launch` as file:/ pipeline""" + """Create a bare-cloned git repository of the workflow, such it can be launched with `tw launch` as file:/ pipeline""" log.info("Collecting workflow from GitHub") @@ -261,12 +261,12 @@ def download_workflow_tower(self): self.find_container_images(self.workflow_repo.access()) # Download the singularity images - log.info(f"Found {len(self.containers)} container{'s' if len(self.containers) > 1 else ''}") - try: - self.get_singularity_images() - except OSError as e: - log.critical(f"[red]{e}[/]") - sys.exit(1) + log.info(f"Found {len(self.containers)} container{'s' if len(self.containers) > 1 else ''}") + try: + self.get_singularity_images() + except OSError as e: + log.critical(f"[red]{e}[/]") + sys.exit(1) # Justify why compression is skipped for Tower downloads (Prompt is not shown, but CLI argument could have been set) if self.compress_type is not None: @@ -581,7 +581,7 @@ def find_container_images(self, revision_dirname): """ log.debug("Fetching container names for workflow") - # since this is run for multiple versions now, account for previous invocations + # since this is run for multiple revisions now, account for previously detected containers. containers_raw = [] if not self.containers else self.containers # Use linting code to parse the pipeline nextflow config From f4b9e673b3b5b3a14175845d4b6d2d71ace759a2 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 24 Apr 2023 16:19:27 +0200 Subject: [PATCH 28/42] Updating docs and changelog, fixing linting errors. --- CHANGELOG.md | 4 ++++ README.md | 7 +++++++ nf_core/download.py | 8 ++++---- nf_core/modules/modules_repo.py | 2 +- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b79fd410dc..d880e0105d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,10 @@ - Removed `quay.io` from all module Docker container references as this is now supplied at pipeline level. ([#2249](https://github.com/nf-core/tools/pull/2249)) - Remove `CITATION.cff` file from pipeline template, to avoid that pipeline Zenodo entries reference the nf-core publication instead of the pipeline ([#2059](https://github.com/nf-core/tools/pull/2059)). +### Download + +- Introduce a `--tower` flag for `nf-core download` to obtain pipelines in an offline format suited for [seqeralabs® Nextflow Tower](https://cloud.tower.nf/) ([#2247](https://github.com/nf-core/tools/pull/2247)). + ### Linting - Update modules lint test to fail if enable_conda is found ([#2213](https://github.com/nf-core/tools/pull/2213)) diff --git a/README.md b/README.md index 0de42e86e8..13d8b381a3 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ A python package with helper tools for the nf-core community. - [`nf-core list` - List available pipelines](#listing-pipelines) - [`nf-core launch` - Run a pipeline with interactive parameter prompts](#launch-a-pipeline) - [`nf-core download` - Download pipeline for offline use](#downloading-pipelines-for-offline-use) +- [`nf-core download --tower` - Download pipeline for Tower](#downloading-pipelines-for-tower) - [`nf-core licences` - List software licences in a pipeline](#pipeline-software-licences) - [`nf-core create` - Create a new pipeline with the nf-core template](#creating-a-new-pipeline) - [`nf-core lint` - Check pipeline code against nf-core guidelines](#linting-a-workflow) @@ -401,6 +402,12 @@ Note that compressing many GBs of binary files can be slow, so specifying `--com If the download speeds are much slower than your internet connection is capable of, you can set `--parallel-downloads` to a large number to download loads of images at once. +### Adapting downloads to Nextflow Tower + +[seqeralabs® Nextflow Tower](https://cloud.tower.nf/) provides a graphical user interface to oversee pipeline runs, gather statistics and configure compute resources. While pipelines added to _Tower_ are preferably hosted at a Git service, providing them as disconnected, self-reliant repositories is also possible for premises with restricted network access. Choosing the `--tower` flag will download the pipeline in an appropriate form. + +Subsequently, the `*.git` folder can be moved to it's final destination and linked with a pipeline in _Tower_ using the `file:/` prefix. + ## Pipeline software licences Sometimes it's useful to see the software licences of the tools used in a pipeline. diff --git a/nf_core/download.py b/nf_core/download.py index cba7eb7dfb..9fe9c29c9f 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -3,9 +3,6 @@ from __future__ import print_function import concurrent.futures -from datetime import datetime -import git -from git.exc import GitCommandError, InvalidGitRepositoryError import io import logging import os @@ -15,19 +12,22 @@ import sys import tarfile import textwrap +from datetime import datetime from zipfile import ZipFile +import git import questionary import requests import requests_cache import rich import rich.progress +from git.exc import GitCommandError, InvalidGitRepositoryError import nf_core import nf_core.list import nf_core.utils -from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR from nf_core.synced_repo import RemoteProgressbar, SyncedRepo +from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR log = logging.getLogger(__name__) stderr = rich.console.Console( diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 20d581af84..152ed7b0c0 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -11,8 +11,8 @@ import nf_core.modules.modules_json import nf_core.modules.modules_utils -from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR, load_tools_config from nf_core.synced_repo import RemoteProgressbar, SyncedRepo +from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR, load_tools_config log = logging.getLogger(__name__) From 2bf14bd6377fa235bf2066acdaa17b10947d2885 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 24 Apr 2023 21:44:04 +0200 Subject: [PATCH 29/42] Hopefully fixed the existing tests. New ones still need to be written. --- nf_core/download.py | 2 +- tests/test_cli.py | 4 +++- tests/test_download.py | 38 +++++++++++++++++++++----------------- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 9fe9c29c9f..53bb744184 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -717,7 +717,7 @@ def get_singularity_images(self): # Exit if we need to pull images and Singularity is not installed if len(containers_pull) > 0: - if not shutil.which("singularity") or not shutil.which("apptainer"): + if not (shutil.which("singularity") or shutil.which("apptainer")): raise OSError( "Singularity/Apptainer is needed to pull images, but it is not installed or not in $PATH" ) diff --git a/tests/test_cli.py b/tests/test_cli.py index 0a6b37144d..6f51fe1025 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -165,6 +165,7 @@ def test_cli_download(self, mock_dl): "outdir": "/path/outdir", "compress": "tar.gz", "force": None, + "tower": None, "container": "singularity", "singularity-cache-only": None, "parallel-downloads": 2, @@ -177,10 +178,11 @@ def test_cli_download(self, mock_dl): mock_dl.assert_called_once_with( cmd[-1], - params["revision"], + (params["revision"],), params["outdir"], params["compress"], "force" in params, + "tower" in params, params["container"], "singularity-cache-only" in params, params["parallel-downloads"], diff --git a/tests/test_download.py b/tests/test_download.py index e2ae882394..d1a770a630 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -32,10 +32,10 @@ def test_get_release_hash_release(self): download_obj.wf_branches, ) = nf_core.utils.get_repo_releases_branches(pipeline, wfs) download_obj.get_revision_hash() - assert download_obj.wf_sha == "b3e5e3b95aaf01d98391a62a10a3990c0a4de395" - assert download_obj.outdir == "nf-core-methylseq-1.6" + assert download_obj.wf_sha[download_obj.revision[0]] == "b3e5e3b95aaf01d98391a62a10a3990c0a4de395" + assert download_obj.outdir == "nf-core-methylseq_1.6" assert ( - download_obj.wf_download_url + download_obj.wf_download_url[download_obj.revision[0]] == "https://github.com/nf-core/methylseq/archive/b3e5e3b95aaf01d98391a62a10a3990c0a4de395.zip" ) @@ -51,10 +51,10 @@ def test_get_release_hash_branch(self): download_obj.wf_branches, ) = nf_core.utils.get_repo_releases_branches(pipeline, wfs) download_obj.get_revision_hash() - assert download_obj.wf_sha == "819cbac792b76cf66c840b567ed0ee9a2f620db7" - assert download_obj.outdir == "nf-core-exoseq-dev" + assert download_obj.wf_sha[download_obj.revision[0]] == "819cbac792b76cf66c840b567ed0ee9a2f620db7" + assert download_obj.outdir == "nf-core-exoseq_dev" assert ( - download_obj.wf_download_url + download_obj.wf_download_url[download_obj.revision[0]] == "https://github.com/nf-core/exoseq/archive/819cbac792b76cf66c840b567ed0ee9a2f620db7.zip" ) @@ -78,12 +78,16 @@ def test_get_release_hash_non_existent_release(self): def test_download_wf_files(self, outdir): download_obj = DownloadWorkflow(pipeline="nf-core/methylseq", revision="1.6") download_obj.outdir = outdir - download_obj.wf_sha = "b3e5e3b95aaf01d98391a62a10a3990c0a4de395" - download_obj.wf_download_url = ( - "https://github.com/nf-core/methylseq/archive/b3e5e3b95aaf01d98391a62a10a3990c0a4de395.zip" + download_obj.wf_sha = {"1.6": "b3e5e3b95aaf01d98391a62a10a3990c0a4de395"} + download_obj.wf_download_url = { + "1.6": "https://github.com/nf-core/methylseq/archive/b3e5e3b95aaf01d98391a62a10a3990c0a4de395.zip" + } + rev = download_obj.download_wf_files( + download_obj.revision[0], + download_obj.wf_sha[download_obj.revision[0]], + download_obj.wf_download_url[download_obj.revision[0]], ) - download_obj.download_wf_files() - assert os.path.exists(os.path.join(outdir, "workflow", "main.nf")) + assert os.path.exists(os.path.join(outdir, rev, "main.nf")) # # Tests for 'download_configs' @@ -118,7 +122,7 @@ def test_wf_use_local_configs(self, tmp_path): download_obj.download_configs() # Test the function - download_obj.wf_use_local_configs() + download_obj.wf_use_local_configs("workflow") wf_config = nf_core.utils.fetch_wf_config(os.path.join(test_outdir, "workflow"), cache_config=False) assert wf_config["params.custom_config_base"] == f"'{test_outdir}/workflow/../configs/'" @@ -133,14 +137,14 @@ def test_find_container_images(self, tmp_path, mock_fetch_wf_config): "process.mapping.container": "cutting-edge-container", "process.nocontainer": "not-so-cutting-edge", } - download_obj.find_container_images() + download_obj.find_container_images("workflow") assert len(download_obj.containers) == 1 assert download_obj.containers[0] == "cutting-edge-container" # # Tests for 'singularity_pull_image' # - # If Singularity is installed, but the container can't be accessed because it does not exist or there are aceess + # If Singularity is installed, but the container can't be accessed because it does not exist or there are access # restrictions, a FileNotFoundError is raised due to the unavailability of the image. @pytest.mark.skipif( shutil.which("singularity") is None, @@ -153,16 +157,16 @@ def test_singularity_pull_image_singularity_installed(self, tmp_dir, mock_rich_p with pytest.raises(FileNotFoundError): download_obj.singularity_pull_image("a-container", tmp_dir, None, mock_rich_progress) - # If Singularity is not installed, it raises a FileNotFoundError because the singularity command can't be found. + # If Singularity is not installed, it raises a OSError because the singularity command can't be found. @pytest.mark.skipif( shutil.which("singularity") is not None, - reason="Can't test how the code behaves when sungularity is not installed if it is.", + reason="Can't test how the code behaves when singularity is not installed if it is.", ) @with_temporary_folder @mock.patch("rich.progress.Progress.add_task") def test_singularity_pull_image_singularity_not_installed(self, tmp_dir, mock_rich_progress): download_obj = DownloadWorkflow(pipeline="dummy", outdir=tmp_dir) - with pytest.raises(FileNotFoundError): + with pytest.raises(OSError): download_obj.singularity_pull_image("a-container", tmp_dir, None, mock_rich_progress) # From 8de588ecf72df8964edceba689505c07da7a353b Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 25 Apr 2023 14:18:58 +0200 Subject: [PATCH 30/42] Refactor the CLI commands for the Singularity Cache Dir --- nf_core/__main__.py | 36 +++++- nf_core/download.py | 287 +++++++++++++++++++++++++++++--------------- 2 files changed, 220 insertions(+), 103 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 056242aac2..46e9ac0988 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -225,11 +225,30 @@ def launch(pipeline, id, revision, command_only, params_in, params_out, save_all "-c", "--container", type=click.Choice(["none", "singularity"]), help="Download software container images" ) @click.option( - "--singularity-cache-only/--singularity-cache-copy", - help="Don't / do copy images to the output directory and set 'singularity.cacheDir' in workflow", + "-s", + "--singularity-cache", + type=click.Choice(["amend", "copy", "remote"]), + help="Utilize the 'singularity.cacheDir' in the download process, if applicable.", +) +@click.option( + "-i", + "--singularity-cache-index", + type=str, + help="List of images already available in a remote 'singularity.cacheDir', imposes --singularity-cache=remote", ) @click.option("-p", "--parallel-downloads", type=int, default=4, help="Number of parallel image downloads") -def download(pipeline, revision, outdir, compress, force, tower, container, singularity_cache_only, parallel_downloads): +def download( + pipeline, + revision, + outdir, + compress, + force, + tower, + container, + singularity_cache, + singularity_cache_index, + parallel_downloads, +): """ Download a pipeline, nf-core/configs and pipeline singularity images. @@ -239,7 +258,16 @@ def download(pipeline, revision, outdir, compress, force, tower, container, sing from nf_core.download import DownloadWorkflow dl = DownloadWorkflow( - pipeline, revision, outdir, compress, force, tower, container, singularity_cache_only, parallel_downloads + pipeline, + revision, + outdir, + compress, + force, + tower, + container, + singularity_cache, + singularity_cache_index, + parallel_downloads, ) dl.download_workflow() diff --git a/nf_core/download.py b/nf_core/download.py index 53bb744184..59e7f47d3f 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -77,7 +77,7 @@ class DownloadWorkflow: Args: pipeline (str): A nf-core pipeline name. revision (List[str]): The workflow revision to download, like `1.0`. Defaults to None. - singularity (bool): Flag, if the Singularity container should be downloaded as well. Defaults to False. + container (bool): Flag, if the Singularity container should be downloaded as well. Defaults to False. tower (bool): Flag, to customize the download for Nextflow Tower (convert to git bare repo). Defaults to False. outdir (str): Path to the local download directory. Defaults to None. """ @@ -91,7 +91,8 @@ def __init__( force=False, tower=False, container=None, - singularity_cache_only=False, + singularity_cache=None, + singularity_cache_index=None, parallel_downloads=4, ): self.pipeline = pipeline @@ -106,9 +107,12 @@ def __init__( self.compress_type = compress_type self.force = force self.tower = tower - self.include_configs = True + self.include_configs = None self.container = container - self.singularity_cache_only = singularity_cache_only + self.singularity_cache = ( + singularity_cache if not singularity_cache_index else "remote" + ) # if a singularity_cache_index is given, use the file and overrule choice. + self.singularity_cache_index = singularity_cache_index self.parallel_downloads = parallel_downloads self.wf_revisions = {} @@ -117,6 +121,7 @@ def __init__( self.wf_download_url = {} self.nf_config = {} self.containers = [] + self.containers_remote = [] # stores the remote images provided in the file. # Fetch remote workflows self.wfs = nf_core.list.Workflows() @@ -134,11 +139,16 @@ def download_workflow(self): self.prompt_revision() self.get_revision_hash() # Inclusion of configs is unnecessary for Tower. - if not self.tower: + if not self.tower and self.include_configs is None: self.prompt_config_inclusion() - self.prompt_container_download() - self.prompt_use_singularity_cachedir() - self.prompt_singularity_cachedir_only() + if not self.singularity_cache == "remote": + self.prompt_container_download() + self.prompt_singularity_cachedir_creation() + else: + self.container = "singularity" + self.prompt_singularity_cachedir_utilization() + self.prompt_singularity_cachedir_remote(retry=False) + self.read_remote_containers() # Nothing meaningful to compress here. if not self.tower: self.prompt_compression_type() @@ -220,9 +230,6 @@ def download_workflow_classic(self): if self.container == "singularity": self.find_container_images(revision_dirname) - # Download the singularity images - if self.container == "singularity": - log.info(f"Found {len(self.containers)} container{'s' if len(self.containers) > 1 else ''}") try: self.get_singularity_images() except OSError as e: @@ -260,8 +267,6 @@ def download_workflow_tower(self): # Collect all required singularity images self.find_container_images(self.workflow_repo.access()) - # Download the singularity images - log.info(f"Found {len(self.containers)} container{'s' if len(self.containers) > 1 else ''}") try: self.get_singularity_images() except OSError as e: @@ -280,24 +285,27 @@ def prompt_pipeline_name(self): self.pipeline = nf_core.utils.prompt_remote_pipeline_name(self.wfs) def prompt_revision(self): - """Prompt for pipeline revision / branch""" - # Prompt user for revision tag if '--revision' was not set - # If --tower is specified, allow to select multiple revisions - + """ + Prompt for pipeline revision / branch + Prompt user for revision tag if '--revision' was not set + If --tower is specified, allow to select multiple revisions + Also the classic download allows for multiple revisions, but + """ if not bool(self.revision): (choice, tag_set) = nf_core.utils.prompt_pipeline_release_branch( self.wf_revisions, self.wf_branches, multiple=self.tower ) + """ + The checkbox() prompt unfortunately does not support passing a Validator, + so a user who keeps pressing Enter will flounder past the selection without choice. - # The checkbox() prompt unfortunately does not support passing a Validator, - # so a user who keeps pressing Enter will bump through the selection without choice. - - # bool(choice), bool(tag_set): + bool(choice), bool(tag_set): ############################# - # True, True: A choice was made and revisions were available. - # False, True: No selection was made, but revisions were available -> defaults to all available. - # False, False: No selection was made because no revisions were available -> raise AssertionError. - # True, False: Congratulations, you found a bug! That combo shouldn't happen. + True, True: A choice was made and revisions were available. + False, True: No selection was made, but revisions were available -> defaults to all available. + False, False: No selection was made because no revisions were available -> raise AssertionError. + True, False: Congratulations, you found a bug! That combo shouldn't happen. + """ if bool(choice): # have to make sure that self.revision is a list of strings, regardless if choice is str or list of strings. @@ -351,10 +359,14 @@ def get_revision_hash(self): def prompt_config_inclusion(self): """Prompt for inclusion of institutional configurations""" - self.include_configs = questionary.confirm( - "Include the nf-core's default institutional configuration files into the download?", - style=nf_core.utils.nfcore_question_style, - ).ask() + if stderr.is_interactive: # Use rich auto-detection of interactive shells + self.include_configs = questionary.confirm( + "Include the nf-core's default institutional configuration files into the download?", + style=nf_core.utils.nfcore_question_style, + ).ask() + else: + self.include_configs = False + # do not include by default. def prompt_container_download(self): """Prompt whether to download container images or not""" @@ -367,7 +379,7 @@ def prompt_container_download(self): style=nf_core.utils.nfcore_question_style, ).unsafe_ask() - def prompt_use_singularity_cachedir(self): + def prompt_singularity_cachedir_creation(self): """Prompt about using $NXF_SINGULARITY_CACHEDIR if not already set""" if ( self.container == "singularity" @@ -381,6 +393,7 @@ def prompt_use_singularity_cachedir(self): if rich.prompt.Confirm.ask( "[blue bold]?[/] [bold]Define [blue not bold]$NXF_SINGULARITY_CACHEDIR[/] for a shared Singularity image download folder?[/]" ): + self.singularity_cache == "amend" # Prompt user for a cache directory path cachedir_path = None while cachedir_path is None: @@ -425,25 +438,89 @@ def prompt_use_singularity_cachedir(self): "You will need reload your terminal after the download completes for this to take effect." ) - def prompt_singularity_cachedir_only(self): + def prompt_singularity_cachedir_utilization(self): """Ask if we should *only* use $NXF_SINGULARITY_CACHEDIR without copying into target""" if ( - self.singularity_cache_only is None + self.singularity_cache is None # no choice regarding singularity cache has been made. and self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None ): stderr.print( - "\nIf you are working on the same system where you will run Nextflow, you can leave the downloaded images in the " - "[blue not bold]$NXF_SINGULARITY_CACHEDIR[/] folder, Nextflow will automatically find them. " + "\nIf you are working on the same system where you will run Nextflow, you can amend the downloaded images to the ones in the" + "[blue not bold]$NXF_SINGULARITY_CACHEDIR[/] folder, Nextflow will automatically find them." "However if you will transfer the downloaded files to a different system then they should be copied to the target folder." ) - self.singularity_cache_only = rich.prompt.Confirm.ask( - "[blue bold]?[/] [bold]Copy singularity images from [blue not bold]$NXF_SINGULARITY_CACHEDIR[/] to the target folder?[/]" + self.singularity_cache = rich.prompt.Prompt.ask( + "[blue bold]?[/] [bold]Copy singularity images from [blue not bold]$NXF_SINGULARITY_CACHEDIR[/] to the target folder or amend new images to the collection?[/]", + choices=["amend", "copy"], ) - # Sanity check, for when passed as a cli flag - if self.singularity_cache_only and self.container != "singularity": - raise AssertionError("Command has '--singularity-cache-only' set, but '--container' is not 'singularity'") + def prompt_singularity_cachedir_remote(self, retry): + """Prompt about the index of a remote $NXF_SINGULARITY_CACHEDIR""" + if ( + self.container == "singularity" + and self.singularity_cache == "remote" + and self.singularity_cache_index is None + and stderr.is_interactive # Use rich auto-detection of interactive shells + ): + stderr.print( + "\nNextflow and nf-core can use an environment variable called [blue]$NXF_SINGULARITY_CACHEDIR[/] that is a path to a directory where remote Singularity images are stored. " + "This allows downloaded images to be cached in a central location." + ) + # Prompt user for a file listing the contents of the remote cache directory + cachedir_index = None + while cachedir_index is None: + prompt_cachedir_index = questionary.path( + "Specify a list of the remote images already present in the remote system :", + file_filter="*.txt", + style=nf_core.utils.nfcore_question_style, + ).unsafe_ask() + cachedir_index = os.path.abspath(os.path.expanduser(prompt_cachedir_index)) + if prompt_cachedir_index == "": + log.error("Will disregard contents of a remote [blue]$NXF_SINGULARITY_CACHEDIR[/]") + self.singularity_cache_index = None + self.singularity_cache = "copy" + elif not os.access(cachedir_index, os.R_OK): + log.error(f"'{cachedir_index}' is not a valid, readable file.") + cachedir_index = None + if cachedir_index: + self.singularity_cache_index = cachedir_index + if retry: # invoke parsing the file again. + self.read_remote_containers() + + def read_remote_containers(self): + """Reads the file specified as index for the remote Singularity cache dir""" + if ( + self.container == "singularity" + and self.singularity_cache == "remote" + and self.singularity_cache_index is not None + ): + n_total_images = 0 + try: + with open(self.singularity_cache_index) as indexfile: + for line in indexfile.readlines(): + match = re.search(r"([^\/\\]+\.img)", line, re.S) + if match: + n_total_images += 1 + self.containers_remote.append(match.group(0)) + if n_total_images == 0: + raise LookupError("Could not find valid container names in the index file.") + else: + log.info( + f"Successfully read {n_total_images} containers from the remote $NXF_SINGULARITY_CACHE contents." + ) + self.containers_remote = sorted(list(set(self.containers_remote))) + except (FileNotFoundError, LookupError) as e: + log.error(f"[red]Issue with reading the specified remote $NXF_SINGULARITY_CACHE index:[/]\n{e}\n") + if rich.prompt.Confirm.ask(f"[blue]Specify a new index file and try again?"): + self.prompt_singularity_cachedir_remote(retry=True) + else: + log.info("Proceeding without consideration of the remote $NXF_SINGULARITY_CACHE index.") + self.singularity_cache_index = None + if os.environ.get("NXF_SINGULARITY_CACHEDIR"): + self.singularity_cache = "copy" # default to copy if possible, otherwise skip. + else: + self.singularity_cache = None def prompt_compression_type(self): """Ask user if we should compress the downloaded files""" @@ -531,7 +608,7 @@ def wf_use_local_configs(self, revision_dirname): nfconfig = nfconfig.replace(find_str, repl_str) # Append the singularity.cacheDir to the end if we need it - if self.container == "singularity" and not self.singularity_cache_only: + if self.container == "singularity" and self.singularity_cache == "copy": nfconfig += ( f"\n\n// Added by `nf-core download` v{nf_core.__version__} //\n" + 'singularity.cacheDir = "${projectDir}/../singularity-images/"' @@ -674,8 +751,14 @@ def get_singularity_images(self): if len(self.containers) == 0: log.info("No container names found in workflow") else: + log.info( + f"Found {len(self.containers)} container image{'s' if len(self.containers) > 1 else ''} in workflow." + ) + with DownloadProgress() as progress: - task = progress.add_task("all_containers", total=len(self.containers), progress_type="summary") + task = progress.add_task( + "Collecting container images", total=len(self.containers), progress_type="summary" + ) # Organise containers based on what we need to do with them containers_exist = [] @@ -697,8 +780,8 @@ def get_singularity_images(self): log.debug(f"Cache directory not found, creating: {cache_path_dir}") os.makedirs(cache_path_dir) - # We already have the target file in place, return - if os.path.exists(out_path): + # We already have the target file in place or in remote cache, return + if os.path.exists(out_path) or os.path.basename(out_path) in self.containers_remote: containers_exist.append(container) continue @@ -722,56 +805,62 @@ def get_singularity_images(self): "Singularity/Apptainer is needed to pull images, but it is not installed or not in $PATH" ) - # Go through each method of fetching containers in order - for container in containers_exist: - progress.update(task, description="Image file exists") - progress.update(task, advance=1) - - for container in containers_cache: - progress.update(task, description="Copying singularity images from cache") - self.singularity_copy_cache_image(*container) - progress.update(task, advance=1) - - with concurrent.futures.ThreadPoolExecutor(max_workers=self.parallel_downloads) as pool: - progress.update(task, description="Downloading singularity images") - - # Kick off concurrent downloads - future_downloads = [ - pool.submit(self.singularity_download_image, *container, progress) - for container in containers_download - ] - - # Make ctrl-c work with multi-threading - self.kill_with_fire = False - - try: - # Iterate over each threaded download, waiting for them to finish - for future in concurrent.futures.as_completed(future_downloads): - future.result() - try: - progress.update(task, advance=1) - except Exception as e: - log.error(f"Error updating progress bar: {e}") - - except KeyboardInterrupt: - # Cancel the future threads that haven't started yet - for future in future_downloads: - future.cancel() - # Set the variable that the threaded function looks for - # Will trigger an exception from each thread - self.kill_with_fire = True - # Re-raise exception on the main thread - raise - - for container in containers_pull: - progress.update(task, description="Pulling singularity images") - try: - self.singularity_pull_image(*container, progress) - except RuntimeWarning as r: - # Raise exception if this is not possible - log.error("Not able to pull image. Service might be down or internet connection is dead.") - raise r - progress.update(task, advance=1) + if containers_exist: + if self.singularity_cache_index is not None: + log.info(f"{len(containers_exist)} are already cached remotely and won't be retrieved.") + # Go through each method of fetching containers in order + for container in containers_exist: + progress.update(task, description="Image file exists at destination") + progress.update(task, advance=1) + + if containers_cache: + for container in containers_cache: + progress.update(task, description="Copying singularity images from cache") + self.singularity_copy_cache_image(*container) + progress.update(task, advance=1) + + if containers_download or containers_pull: + # if clause gives slightly better UX, because Download is no longer displayed if nothing is left to be downloaded. + with concurrent.futures.ThreadPoolExecutor(max_workers=self.parallel_downloads) as pool: + progress.update(task, description="Downloading singularity images") + + # Kick off concurrent downloads + future_downloads = [ + pool.submit(self.singularity_download_image, *container, progress) + for container in containers_download + ] + + # Make ctrl-c work with multi-threading + self.kill_with_fire = False + + try: + # Iterate over each threaded download, waiting for them to finish + for future in concurrent.futures.as_completed(future_downloads): + future.result() + try: + progress.update(task, advance=1) + except Exception as e: + log.error(f"Error updating progress bar: {e}") + + except KeyboardInterrupt: + # Cancel the future threads that haven't started yet + for future in future_downloads: + future.cancel() + # Set the variable that the threaded function looks for + # Will trigger an exception from each thread + self.kill_with_fire = True + # Re-raise exception on the main thread + raise + + for container in containers_pull: + progress.update(task, description="Pulling singularity images") + try: + self.singularity_pull_image(*container, progress) + except RuntimeWarning as r: + # Raise exception if this is not possible + log.error("Not able to pull image. Service might be down or internet connection is dead.") + raise r + progress.update(task, advance=1) def singularity_image_filenames(self, container): """Check Singularity cache for image, copy to destination folder if found. @@ -810,11 +899,11 @@ def singularity_image_filenames(self, container): if os.environ.get("NXF_SINGULARITY_CACHEDIR"): cache_path = os.path.join(os.environ["NXF_SINGULARITY_CACHEDIR"], out_name) # Use only the cache - set this as the main output path - if self.singularity_cache_only: + if self.singularity_cache == "amend": out_path = cache_path cache_path = None - elif self.singularity_cache_only: - raise FileNotFoundError("'--singularity-cache' specified but no '$NXF_SINGULARITY_CACHEDIR' set!") + elif self.singularity_cache in ["amend", "copy"]: + raise FileNotFoundError("Singularity cache is required but no '$NXF_SINGULARITY_CACHEDIR' set!") return (out_path, cache_path) @@ -998,7 +1087,6 @@ def __init__( remote_url, revision, commit, - no_pull=False, hide_progress=False, in_cache=True, ): @@ -1028,6 +1116,7 @@ def __init__( self.commit = [] self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) self.retries = 0 # retries for setting up the locally cached repository + self.hide_progress = hide_progress self.setup_local_repo(remote_url, in_cache=in_cache) @@ -1089,7 +1178,7 @@ def setup_local_repo(self, remote, in_cache=True): rich.progress.BarColumn(bar_width=None), "[bold yellow]{task.fields[state]}", transient=True, - disable=os.environ.get("HIDE_PROGRESS", None) is not None, + disable=os.environ.get("HIDE_PROGRESS", None) is not None or self.hide_progress, ) with pbar: self.repo = git.Repo.clone_from( @@ -1112,7 +1201,7 @@ def setup_local_repo(self, remote, in_cache=True): rich.progress.BarColumn(bar_width=None), "[bold yellow]{task.fields[state]}", transient=True, - disable=os.environ.get("HIDE_PROGRESS", None) is not None, + disable=os.environ.get("HIDE_PROGRESS", None) is not None or self.hide_progress, ) with pbar: self.repo.remotes.origin.fetch( From d729bde26b96b0131048159ace4bc5e1867a30bf Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 26 Apr 2023 21:05:55 +0200 Subject: [PATCH 31/42] Readme updates for the new remote Singularity cache feature. --- README.md | 18 ++++++++++-------- nf_core/download.py | 22 +++++++++++----------- tests/test_cli.py | 6 ++++-- 3 files changed, 25 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 13d8b381a3..28c764a09a 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ A python package with helper tools for the nf-core community. - [`nf-core list` - List available pipelines](#listing-pipelines) - [`nf-core launch` - Run a pipeline with interactive parameter prompts](#launch-a-pipeline) - [`nf-core download` - Download pipeline for offline use](#downloading-pipelines-for-offline-use) -- [`nf-core download --tower` - Download pipeline for Tower](#downloading-pipelines-for-tower) +- [`nf-core download --tower` - Adapting downloads to Nextflow Tower](#adapting-downloads-to-nextflow-tower) - [`nf-core licences` - List software licences in a pipeline](#pipeline-software-licences) - [`nf-core create` - Create a new pipeline with the nf-core template](#creating-a-new-pipeline) - [`nf-core lint` - Check pipeline code against nf-core guidelines](#linting-a-workflow) @@ -349,13 +349,13 @@ nextflow run /path/to/download/nf-core-rnaseq-dev/workflow/ --input mydata.csv - ### Downloaded nf-core configs The pipeline files are automatically updated (`params.custom_config_base` is set to `../configs`), so that the local copy of institutional configs are available when running the pipeline. -So using `-profile ` should work if available within [nf-core/configs](https://github.com/nf-core/configs). +So using `-profile ` should work if available within [nf-core/configs](https://github.com/nf-core/configs). This option is not available when downloading a pipeline for use with [Nextflow Tower](#adapting-downloads-to-nextflow-tower) because the application manages all configurations separately. ### Downloading singularity containers If you're using Singularity, the `nf-core download` command can also fetch the required Singularity container images for you. To do this, select `singularity` in the prompt or specify `--container singularity` in the command. -Your archive / target output directory will then include three folders: `workflow`, `configs` and also `singularity-containers`. +Your archive / target output directory will then also include a separate folder `singularity-containers`. The downloaded workflow files are again edited to add the following line to the end of the pipeline's `nextflow.config` file: @@ -373,11 +373,13 @@ We highly recommend setting the `$NXF_SINGULARITY_CACHEDIR` environment variable If found, the tool will fetch the Singularity images to this directory first before copying to the target output archive / directory. Any images previously fetched will be found there and copied directly - this includes images that may be shared with other pipelines or previous pipeline version downloads or download attempts. -If you are running the download on the same system where you will be running the pipeline (eg. a shared filesystem where Nextflow won't have an internet connection at a later date), you can choose to _only_ use the cache via a prompt or cli options `--singularity-cache-only` / `--singularity-cache-copy`. +If you are running the download on the same system where you will be running the pipeline (eg. a shared filesystem where Nextflow won't have an internet connection at a later date), you can choose to _only_ use the cache via a prompt or cli options `--singularity-cache amend`. This instructs `nf-core download` to fetch all Singularity images to the `$NXF_SINGULARITY_CACHEDIR` directory but does _not_ copy them to the workflow archive / directory. The workflow config file is _not_ edited. This means that when you later run the workflow, Nextflow will just use the cache folder directly. +If you are downloading a workflow for a different system, you can provide information about its image cache to `nf-core download`. To avoid unnecessary downloads, choose `--singularity-cache remote` and provide a list of already available images as plain text file to `--singularity-cache-index my_list_of_remotely_available_images.txt`. To generate this list on the remote system, run `find $NXF_SINGULARITY_CACHEDIR -name "*.img" > my_list_of_remotely_available_images.txt`. + #### How the Singularity image downloads work The Singularity image download finds containers using two methods: @@ -392,13 +394,13 @@ Where both are found, the download URL is preferred. Once a full list of containers is found, they are processed in the following order: -1. If the target image already exists, nothing is done (eg. with `$NXF_SINGULARITY_CACHEDIR` and `--singularity-cache-only` specified) -2. If found in `$NXF_SINGULARITY_CACHEDIR` and `--singularity-cache-only` is _not_ specified, they are copied to the output directory +1. If the target image already exists, nothing is done (eg. with `$NXF_SINGULARITY_CACHEDIR` and `--singularity-cache amend` specified) +2. If found in `$NXF_SINGULARITY_CACHEDIR` and `--singularity-cache copy` is specified, they are copied to the output directory 3. If they start with `http` they are downloaded directly within Python (default 4 at a time, you can customise this with `--parallel-downloads`) 4. If they look like a Docker image name, they are fetched using a `singularity pull` command - - This requires Singularity to be installed on the system and is substantially slower + - This requires Singularity/Apptainer to be installed on the system and is substantially slower -Note that compressing many GBs of binary files can be slow, so specifying `--compress none` is recommended when downloading Singularity images. +Note that compressing many GBs of binary files can be slow, so specifying `--compress none` is recommended when downloading Singularity images that are copied to the output directory. If the download speeds are much slower than your internet connection is capable of, you can set `--parallel-downloads` to a large number to download loads of images at once. diff --git a/nf_core/download.py b/nf_core/download.py index 59e7f47d3f..8297eb3f5c 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -141,11 +141,12 @@ def download_workflow(self): # Inclusion of configs is unnecessary for Tower. if not self.tower and self.include_configs is None: self.prompt_config_inclusion() + # If a remote cache is specified, it is safe to assume images should be downloaded. if not self.singularity_cache == "remote": self.prompt_container_download() - self.prompt_singularity_cachedir_creation() else: self.container = "singularity" + self.prompt_singularity_cachedir_creation() self.prompt_singularity_cachedir_utilization() self.prompt_singularity_cachedir_remote(retry=False) self.read_remote_containers() @@ -371,7 +372,7 @@ def prompt_config_inclusion(self): def prompt_container_download(self): """Prompt whether to download container images or not""" - if self.container is None: + if self.container is None and stderr.is_interactive: stderr.print("\nIn addition to the pipeline code, this tool can download software containers.") self.container = questionary.select( "Download software container images:", @@ -393,7 +394,8 @@ def prompt_singularity_cachedir_creation(self): if rich.prompt.Confirm.ask( "[blue bold]?[/] [bold]Define [blue not bold]$NXF_SINGULARITY_CACHEDIR[/] for a shared Singularity image download folder?[/]" ): - self.singularity_cache == "amend" + if not self.singularity_cache_index: + self.singularity_cache == "amend" # retain "remote" choice. # Prompt user for a cache directory path cachedir_path = None while cachedir_path is None: @@ -419,7 +421,7 @@ def prompt_singularity_cachedir_creation(self): if bashrc_path: stderr.print( f"\nSo that [blue]$NXF_SINGULARITY_CACHEDIR[/] is always defined, you can add it to your [blue not bold]~/{os.path.basename(bashrc_path)}[/] file ." - "This will then be autmoatically set every time you open a new terminal. We can add the following line to this file for you: \n" + "This will then be automatically set every time you open a new terminal. We can add the following line to this file for you: \n" f'[blue]export NXF_SINGULARITY_CACHEDIR="{cachedir_path}"[/]' ) append_to_file = rich.prompt.Confirm.ask( @@ -444,16 +446,18 @@ def prompt_singularity_cachedir_utilization(self): self.singularity_cache is None # no choice regarding singularity cache has been made. and self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None + and stderr.is_interactive ): stderr.print( "\nIf you are working on the same system where you will run Nextflow, you can amend the downloaded images to the ones in the" "[blue not bold]$NXF_SINGULARITY_CACHEDIR[/] folder, Nextflow will automatically find them." "However if you will transfer the downloaded files to a different system then they should be copied to the target folder." ) - self.singularity_cache = rich.prompt.Prompt.ask( - "[blue bold]?[/] [bold]Copy singularity images from [blue not bold]$NXF_SINGULARITY_CACHEDIR[/] to the target folder or amend new images to the collection?[/]", + self.singularity_cache = questionary.select( + "[blue bold]?[/] [bold]Copy singularity images from [blue not bold]$NXF_SINGULARITY_CACHEDIR[/] to the target folder or amend new images to the cache?[/]", choices=["amend", "copy"], - ) + style=nf_core.utils.nfcore_question_style, + ).unsafe_ask() def prompt_singularity_cachedir_remote(self, retry): """Prompt about the index of a remote $NXF_SINGULARITY_CACHEDIR""" @@ -463,10 +467,6 @@ def prompt_singularity_cachedir_remote(self, retry): and self.singularity_cache_index is None and stderr.is_interactive # Use rich auto-detection of interactive shells ): - stderr.print( - "\nNextflow and nf-core can use an environment variable called [blue]$NXF_SINGULARITY_CACHEDIR[/] that is a path to a directory where remote Singularity images are stored. " - "This allows downloaded images to be cached in a central location." - ) # Prompt user for a file listing the contents of the remote cache directory cachedir_index = None while cachedir_index is None: diff --git a/tests/test_cli.py b/tests/test_cli.py index 6f51fe1025..873b7d4b0c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -167,7 +167,8 @@ def test_cli_download(self, mock_dl): "force": None, "tower": None, "container": "singularity", - "singularity-cache-only": None, + "singularity-cache": "copy", + "singularity-cache-index": "/path/index.txt", "parallel-downloads": 2, } @@ -184,7 +185,8 @@ def test_cli_download(self, mock_dl): "force" in params, "tower" in params, params["container"], - "singularity-cache-only" in params, + params["singularity-cache"], + params["singularity-cache-index"], params["parallel-downloads"], ) From 0f58c29352c816056045450cac90ed7e8e6bfdc6 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 27 Apr 2023 11:49:21 +0200 Subject: [PATCH 32/42] Add interactive check in retry for parsing the index. --- nf_core/download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nf_core/download.py b/nf_core/download.py index 8297eb3f5c..76a3f00054 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -512,7 +512,7 @@ def read_remote_containers(self): self.containers_remote = sorted(list(set(self.containers_remote))) except (FileNotFoundError, LookupError) as e: log.error(f"[red]Issue with reading the specified remote $NXF_SINGULARITY_CACHE index:[/]\n{e}\n") - if rich.prompt.Confirm.ask(f"[blue]Specify a new index file and try again?"): + if stderr.is_interactive and rich.prompt.Confirm.ask(f"[blue]Specify a new index file and try again?"): self.prompt_singularity_cachedir_remote(retry=True) else: log.info("Proceeding without consideration of the remote $NXF_SINGULARITY_CACHE index.") From 6294d74559cabebad29d6cb08d3aed6b4c57756b Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 27 Apr 2023 12:54:16 +0200 Subject: [PATCH 33/42] Incorporating some suggestions by @mashehu. --- nf_core/download.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 76a3f00054..b8ce5a1607 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -148,8 +148,7 @@ def download_workflow(self): self.container = "singularity" self.prompt_singularity_cachedir_creation() self.prompt_singularity_cachedir_utilization() - self.prompt_singularity_cachedir_remote(retry=False) - self.read_remote_containers() + self.prompt_singularity_cachedir_remote() # Nothing meaningful to compress here. if not self.tower: self.prompt_compression_type() @@ -177,8 +176,8 @@ def download_workflow(self): if not self.tower: # Only show entry, if option was prompted. summary_log.append(f"Include default institutional configuration: '{self.include_configs}'") - - summary_log.append(f"Enabled for seqeralabs® Nextflow Tower: '{self.tower}'") + else: + summary_log.append(f"Enabled for seqeralabs® Nextflow Tower: '{self.tower}'") # Check that the outdir doesn't already exist if os.path.exists(self.outdir): @@ -203,9 +202,9 @@ def download_workflow(self): if self.tower: self.download_workflow_tower() else: - self.download_workflow_classic() + self.download_workflow_static() - def download_workflow_classic(self): + def download_workflow_static(self): """Downloads a nf-core workflow from GitHub to the local file system in a self-contained manner.""" # Download the centralised configs first @@ -290,7 +289,8 @@ def prompt_revision(self): Prompt for pipeline revision / branch Prompt user for revision tag if '--revision' was not set If --tower is specified, allow to select multiple revisions - Also the classic download allows for multiple revisions, but + Also the static download allows for multiple revisions, but + we do not prompt this option interactively. """ if not bool(self.revision): (choice, tag_set) = nf_core.utils.prompt_pipeline_release_branch( @@ -459,7 +459,7 @@ def prompt_singularity_cachedir_utilization(self): style=nf_core.utils.nfcore_question_style, ).unsafe_ask() - def prompt_singularity_cachedir_remote(self, retry): + def prompt_singularity_cachedir_remote(self): """Prompt about the index of a remote $NXF_SINGULARITY_CACHEDIR""" if ( self.container == "singularity" @@ -485,8 +485,8 @@ def prompt_singularity_cachedir_remote(self, retry): cachedir_index = None if cachedir_index: self.singularity_cache_index = cachedir_index - if retry: # invoke parsing the file again. - self.read_remote_containers() + # in any case read the remote containers, even if no prompt was shown. + self.read_remote_containers() def read_remote_containers(self): """Reads the file specified as index for the remote Singularity cache dir""" From 8d327a46480dc55ea0e4fcf7e06d31d8b58c609f Mon Sep 17 00:00:00 2001 From: Matthias Zepper <6963520+MatthiasZepper@users.noreply.github.com> Date: Thu, 27 Apr 2023 12:58:46 +0200 Subject: [PATCH 34/42] Apply suggestions from code review @mashehu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Matthias Hörtenhuber --- nf_core/__main__.py | 4 ++-- nf_core/download.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 46e9ac0988..8b94e64715 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -213,14 +213,14 @@ def launch(pipeline, id, revision, command_only, params_in, params_out, save_all "-r", "--revision", multiple=True, - help="Pipeline release to download. Multiple invocations are possible.", + help="Pipeline release to download. Multiple invocations are possible, e.g. `-r 1.1 -r 1.2.", ) @click.option("-o", "--outdir", type=str, help="Output directory") @click.option( "-x", "--compress", type=click.Choice(["tar.gz", "tar.bz2", "zip", "none"]), help="Archive compression type" ) @click.option("-f", "--force", is_flag=True, default=False, help="Overwrite existing files") -@click.option("-t", "--tower", is_flag=True, default=False, help="Customize download for seqeralabs® Nextflow Tower") +@click.option("-t", "--tower", is_flag=True, default=False, help="Download for seqeralabs® Nextflow Tower") @click.option( "-c", "--container", type=click.Choice(["none", "singularity"]), help="Download software container images" ) diff --git a/nf_core/download.py b/nf_core/download.py index b8ce5a1607..8832eca5d3 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -242,7 +242,7 @@ def download_workflow_static(self): self.compress_download() def download_workflow_tower(self): - """Create a bare-cloned git repository of the workflow, such it can be launched with `tw launch` as file:/ pipeline""" + """Create a bare-cloned git repository of the workflow, so it can be launched with `tw launch` as file:/ pipeline""" log.info("Collecting workflow from GitHub") @@ -481,7 +481,7 @@ def prompt_singularity_cachedir_remote(self): self.singularity_cache_index = None self.singularity_cache = "copy" elif not os.access(cachedir_index, os.R_OK): - log.error(f"'{cachedir_index}' is not a valid, readable file.") + log.error(f"'{cachedir_index}' is not a readable file.") cachedir_index = None if cachedir_index: self.singularity_cache_index = cachedir_index From 340c5195067809b27d1f1da5273d9d2ca99bafc3 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 27 Apr 2023 20:12:15 +0200 Subject: [PATCH 35/42] Writing additional tests for the --tower download functionality. --- .../workflows/pytest-frozen-ubuntu-20.04.yml | 2 +- CHANGELOG.md | 1 + nf_core/__main__.py | 2 +- nf_core/download.py | 95 +++++++++++-------- tests/data/testdata_remote_containers.txt | 37 ++++++++ tests/test_download.py | 91 +++++++++++++++++- 6 files changed, 188 insertions(+), 40 deletions(-) create mode 100644 tests/data/testdata_remote_containers.txt diff --git a/.github/workflows/pytest-frozen-ubuntu-20.04.yml b/.github/workflows/pytest-frozen-ubuntu-20.04.yml index b015376633..5faf8ce605 100644 --- a/.github/workflows/pytest-frozen-ubuntu-20.04.yml +++ b/.github/workflows/pytest-frozen-ubuntu-20.04.yml @@ -15,7 +15,7 @@ concurrency: cancel-in-progress: true jobs: - pytest: + pytest-frozen: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 diff --git a/CHANGELOG.md b/CHANGELOG.md index d880e0105d..513f1b014e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,7 @@ ### Download - Introduce a `--tower` flag for `nf-core download` to obtain pipelines in an offline format suited for [seqeralabs® Nextflow Tower](https://cloud.tower.nf/) ([#2247](https://github.com/nf-core/tools/pull/2247)). +- Refactored the CLI for `--singularity-cache` in `nf-core download` from a flag to an argument. The prior options were renamed to `amend` (container images are only saved in the `$NXF_SINGULARITY_CACHEDIR`) and `copy` (a copy of the image is saved with the download). `remote` was newly introduced and allows to provide a table of contents of a remote cache via an additional argument `--singularity-cache-index` ([#2247](https://github.com/nf-core/tools/pull/2247)). ### Linting diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 8b94e64715..6d6ded471a 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -213,7 +213,7 @@ def launch(pipeline, id, revision, command_only, params_in, params_out, save_all "-r", "--revision", multiple=True, - help="Pipeline release to download. Multiple invocations are possible, e.g. `-r 1.1 -r 1.2.", + help="Pipeline release to download. Multiple invocations are possible, e.g. `-r 1.1 -r 1.2`", ) @click.option("-o", "--outdir", type=str, help="Output directory") @click.option( diff --git a/nf_core/download.py b/nf_core/download.py index 8832eca5d3..db98b17f22 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -108,7 +108,7 @@ def __init__( self.force = force self.tower = tower self.include_configs = None - self.container = container + self.container = container if not singularity_cache_index else "singularity" self.singularity_cache = ( singularity_cache if not singularity_cache_index else "remote" ) # if a singularity_cache_index is given, use the file and overrule choice. @@ -157,7 +157,7 @@ def download_workflow(self): sys.exit(1) summary_log = [ - f"Pipeline revision: '{', '.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',['+str(len(self.revision)-2)+' more revisions],'+self.revision[-1]}'", + f"Pipeline revision: '{', '.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',...['+str(len(self.revision)-2)+' more revisions]...,'+self.revision[-1]}'", f"Pull containers: '{self.container}'", ] if self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None: @@ -228,28 +228,29 @@ def download_workflow_static(self): # Collect all required singularity images if self.container == "singularity": - self.find_container_images(revision_dirname) + self.find_container_images(os.path.join(self.outdir, revision_dirname)) - try: - self.get_singularity_images() - except OSError as e: - log.critical(f"[red]{e}[/]") - sys.exit(1) + try: + self.get_singularity_images(current_revision=item[0]) + except OSError as e: + log.critical(f"[red]{e}[/]") + sys.exit(1) # Compress into an archive if self.compress_type is not None: log.info("Compressing output into archive") self.compress_download() - def download_workflow_tower(self): + def download_workflow_tower(self, location=None): """Create a bare-cloned git repository of the workflow, so it can be launched with `tw launch` as file:/ pipeline""" log.info("Collecting workflow from GitHub") self.workflow_repo = WorkflowRepo( - remote_url=f"git@github.com:{self.pipeline}.git", + remote_url=f"https://github.com/{self.pipeline}.git", revision=self.revision if self.revision else None, commit=self.wf_sha.values() if bool(self.wf_sha) else None, + location=location if location else None, # manual location is required for the tests to work in_cache=False, ) @@ -261,17 +262,17 @@ def download_workflow_tower(self): # extract the required containers if self.container == "singularity": - for commit in self.wf_sha.values(): + for revision, commit in self.wf_sha.items(): # Checkout the repo in the current revision self.workflow_repo.checkout(commit) # Collect all required singularity images self.find_container_images(self.workflow_repo.access()) - try: - self.get_singularity_images() - except OSError as e: - log.critical(f"[red]{e}[/]") - sys.exit(1) + try: + self.get_singularity_images(current_revision=revision) + except OSError as e: + log.critical(f"[red]{e}[/]") + sys.exit(1) # Justify why compression is skipped for Tower downloads (Prompt is not shown, but CLI argument could have been set) if self.compress_type is not None: @@ -412,30 +413,47 @@ def prompt_singularity_cachedir_creation(self): if cachedir_path: os.environ["NXF_SINGULARITY_CACHEDIR"] = cachedir_path - # Ask if user wants this set in their .bashrc - bashrc_path = os.path.expanduser("~/.bashrc") - if not os.path.isfile(bashrc_path): - bashrc_path = os.path.expanduser("~/.bash_profile") - if not os.path.isfile(bashrc_path): - bashrc_path = False - if bashrc_path: + """ + Optionally, create a permanent entry for the NXF_SINGULARITY_CACHEDIR in the terminal profile. + Currently support for bash and zsh. + ToDo: "sh", "bash", "dash", "ash","csh", "tcsh", "ksh", "zsh", "fish", "cmd", "powershell", "pwsh"? + """ + + if os.environ["SHELL"] == "/bin/bash": + shellprofile_path = os.path.expanduser("~/~/.bash_profile") + if not os.path.isfile(shellprofile_path): + shellprofile_path = os.path.expanduser("~/.bashrc") + if not os.path.isfile(shellprofile_path): + shellprofile_path = False + elif os.environ["SHELL"] == "/bin/zsh": + shellprofile_path = os.path.expanduser("~/.zprofile") + if not os.path.isfile(shellprofile_path): + shellprofile_path = os.path.expanduser("~/.zshenv") + if not os.path.isfile(shellprofile_path): + shellprofile_path = False + else: + shellprofile_path = os.path.expanduser("~/.profile") + if not os.path.isfile(shellprofile_path): + shellprofile_path = False + + if shellprofile_path: stderr.print( - f"\nSo that [blue]$NXF_SINGULARITY_CACHEDIR[/] is always defined, you can add it to your [blue not bold]~/{os.path.basename(bashrc_path)}[/] file ." + f"\nSo that [blue]$NXF_SINGULARITY_CACHEDIR[/] is always defined, you can add it to your [blue not bold]~/{os.path.basename(shellprofile_path)}[/] file ." "This will then be automatically set every time you open a new terminal. We can add the following line to this file for you: \n" f'[blue]export NXF_SINGULARITY_CACHEDIR="{cachedir_path}"[/]' ) append_to_file = rich.prompt.Confirm.ask( - f"[blue bold]?[/] [bold]Add to [blue not bold]~/{os.path.basename(bashrc_path)}[/] ?[/]" + f"[blue bold]?[/] [bold]Add to [blue not bold]~/{os.path.basename(shellprofile_path)}[/] ?[/]" ) if append_to_file: - with open(os.path.expanduser(bashrc_path), "a") as f: + with open(os.path.expanduser(shellprofile_path), "a") as f: f.write( "\n\n#######################################\n" f"## Added by `nf-core download` v{nf_core.__version__} ##\n" + f'export NXF_SINGULARITY_CACHEDIR="{cachedir_path}"' + "\n#######################################\n" ) - log.info(f"Successfully wrote to [blue]{bashrc_path}[/]") + log.info(f"Successfully wrote to [blue]{shellprofile_path}[/]") log.warning( "You will need reload your terminal after the download completes for this to take effect." ) @@ -620,7 +638,7 @@ def wf_use_local_configs(self, revision_dirname): with open(nfconfig_fn, "w") as nfconfig_fh: nfconfig_fh.write(nfconfig) - def find_container_images(self, revision_dirname): + def find_container_images(self, workflow_directory): """Find container image names for workflow. Starts by using `nextflow config` to pull out any process.container @@ -662,7 +680,7 @@ def find_container_images(self, revision_dirname): containers_raw = [] if not self.containers else self.containers # Use linting code to parse the pipeline nextflow config - self.nf_config = nf_core.utils.fetch_wf_config(os.path.join(self.outdir, revision_dirname)) + self.nf_config = nf_core.utils.fetch_wf_config(workflow_directory) # Find any config variables that look like a container for k, v in self.nf_config.items(): @@ -670,7 +688,7 @@ def find_container_images(self, revision_dirname): containers_raw.append(v.strip('"').strip("'")) # Recursive search through any DSL2 module files for container spec lines. - for subdir, _, files in os.walk(os.path.join(self.outdir, revision_dirname, "modules")): + for subdir, _, files in os.walk(os.path.join(workflow_directory, "modules")): for file in files: if file.endswith(".nf"): file_path = os.path.join(subdir, file) @@ -745,14 +763,14 @@ def find_container_images(self, revision_dirname): # Remove duplicates and sort self.containers = sorted(list(set(containers_raw))) - def get_singularity_images(self): + def get_singularity_images(self, current_revision=""): """Loop through container names and download Singularity images""" if len(self.containers) == 0: log.info("No container names found in workflow") else: log.info( - f"Found {len(self.containers)} container image{'s' if len(self.containers) > 1 else ''} in workflow." + f"Processing workflow revision {current_revision}, found {len(self.containers)} container image{'s' if len(self.containers) > 1 else ''} in total." ) with DownloadProgress() as progress: @@ -1087,6 +1105,7 @@ def __init__( remote_url, revision, commit, + location=None, hide_progress=False, in_cache=True, ): @@ -1118,7 +1137,7 @@ def __init__( self.retries = 0 # retries for setting up the locally cached repository self.hide_progress = hide_progress - self.setup_local_repo(remote_url, in_cache=in_cache) + self.setup_local_repo(remote=remote_url, location=location, in_cache=in_cache) # expose some instance attributes self.tags = self.repo.tags @@ -1155,7 +1174,7 @@ def retry_setup_local_repo(self, skip_confirm=False): else: raise LookupError("Exiting due to error with locally cached Git repository.") - def setup_local_repo(self, remote, in_cache=True): + def setup_local_repo(self, remote, location=None, in_cache=True): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from @@ -1163,13 +1182,15 @@ def setup_local_repo(self, remote, in_cache=True): Args: remote (str): git url of remote - commit (str): name of branch to checkout from (optional) - hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. + location (Path): location where the clone should be created/cached. in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. Sets self.repo """ + if location: + self.local_repo_dir = os.path.join(location, self.fullname) + else: + self.local_repo_dir = os.path.join(NFCORE_DIR if not in_cache else NFCORE_CACHE_DIR, self.fullname) - self.local_repo_dir = os.path.join(NFCORE_DIR if not in_cache else NFCORE_CACHE_DIR, self.fullname) try: if not os.path.exists(self.local_repo_dir): try: diff --git a/tests/data/testdata_remote_containers.txt b/tests/data/testdata_remote_containers.txt new file mode 100644 index 0000000000..93cf46f2f6 --- /dev/null +++ b/tests/data/testdata_remote_containers.txt @@ -0,0 +1,37 @@ +./depot.galaxyproject.org-singularity-bbmap-38.93--he522d1c_0.img +./depot.galaxyproject.org-singularity-bedtools-2.30.0--hc088bd4_0.img +./depot.galaxyproject.org-singularity-bioconductor-dupradar-1.18.0--r40_1.img +./depot.galaxyproject.org-singularity-bioconductor-summarizedexperiment-1.20.0--r40_0.img +./depot.galaxyproject.org-singularity-bioconductor-tximeta-1.8.0--r40_0.img +./depot.galaxyproject.org-singularity-fastqc-0.11.9--0.img +./depot.galaxyproject.org-singularity-gffread-0.12.1--h8b12597_0.img +./depot.galaxyproject.org-singularity-hisat2-2.2.1--h1b792b2_3.img +./depot.galaxyproject.org-singularity-mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2-59cdd445419f14abac76b31dd0d71217994cbcc9-0.img +./depot.galaxyproject.org-singularity-mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2-afaaa4c6f5b308b4b6aa2dd8e99e1466b2a6b0cd-0.img +./depot.galaxyproject.org-singularity-mulled-v2-8849acf39a43cdd6c839a369a74c0adc823e2f91-ab110436faf952a33575c64dd74615a84011450b-0.img +./depot.galaxyproject.org-singularity-mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1-0e773bb207600fcb4d38202226eb20a33c7909b6-0.img +./depot.galaxyproject.org-singularity-mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1-38aed4501da19db366dc7c8d52d31d94e760cfaf-0.img +./depot.galaxyproject.org-singularity-mulled-v2-cf0123ef83b3c38c13e3b0696a3f285d3f20f15b-64aad4a4e144878400649e71f42105311be7ed87-0.img +./depot.galaxyproject.org-singularity-multiqc-1.11--pyhdfd78af_0.img +./depot.galaxyproject.org-singularity-multiqc-1.13--pyhdfd78af_0.img +./depot.galaxyproject.org-singularity-perl-5.26.2.img +./depot.galaxyproject.org-singularity-picard-2.26.10--hdfd78af_0.img +./depot.galaxyproject.org-singularity-picard-2.27.4--hdfd78af_0.img +./depot.galaxyproject.org-singularity-preseq-3.1.2--h445547b_2.img +./depot.galaxyproject.org-singularity-python-3.9--1.img +./depot.galaxyproject.org-singularity-qualimap-2.2.2d--1.img +./depot.galaxyproject.org-singularity-rseqc-3.0.1--py37h516909a_1.img +./depot.galaxyproject.org-singularity-salmon-1.5.2--h84f40af_0.img +./depot.galaxyproject.org-singularity-samtools-1.15.1--h1170115_0.img +./depot.galaxyproject.org-singularity-sortmerna-4.3.4--h9ee0642_0.img +./depot.galaxyproject.org-singularity-stringtie-2.2.1--hecb563c_2.img +./depot.galaxyproject.org-singularity-subread-2.0.1--hed695b0_0.img +./depot.galaxyproject.org-singularity-trim-galore-0.6.7--hdfd78af_0.img +./depot.galaxyproject.org-singularity-ubuntu-20.04.img +./depot.galaxyproject.org-singularity-ucsc-bedclip-377--h0b8a92a_2.img +./depot.galaxyproject.org-singularity-ucsc-bedgraphtobigwig-377--h446ed27_1.img +./depot.galaxyproject.org-singularity-umi_tools-1.1.2--py38h4a8c8d9_0.img +These entries should not be used: +On October 5, 2011, the 224-meter containership MV Rena struck a reef close to New Zealand’s coast and broke apart. That spells disaster, no? +MV Rena + diff --git a/tests/test_download.py b/tests/test_download.py index d1a770a630..41fb9c625f 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -3,16 +3,20 @@ import hashlib import os +import re import shutil import tempfile import unittest +from pathlib import Path from unittest import mock import pytest import nf_core.create import nf_core.utils -from nf_core.download import DownloadWorkflow +from nf_core.download import DownloadWorkflow, WorkflowRepo +from nf_core.synced_repo import SyncedRepo +from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR from .utils import with_temporary_file, with_temporary_folder @@ -169,6 +173,32 @@ def test_singularity_pull_image_singularity_not_installed(self, tmp_dir, mock_ri with pytest.raises(OSError): download_obj.singularity_pull_image("a-container", tmp_dir, None, mock_rich_progress) + # + # Test for '--singularity-cache remote --singularity-cache-index'. Provide a list of containers already available in a remote location. + # + @with_temporary_folder + def test_remote_container_functionality(self, tmp_dir): + os.environ["NXF_SINGULARITY_CACHEDIR"] = "foo" + + download_obj = DownloadWorkflow( + pipeline="nf-core/rnaseq", + outdir=os.path.join(tmp_dir, "new"), + revision="3.9", + compress_type="none", + singularity_cache_index=Path(__file__).resolve().parent / "data/testdata_remote_containers.txt", + ) + + download_obj.include_configs = False # suppress prompt, because stderr.is_interactive doesn't. + + # test if settings are changed accordingly. + assert download_obj.singularity_cache == "remote" and download_obj.container == "singularity" + assert isinstance(download_obj.containers_remote, list) and len(download_obj.containers_remote) == 0 + # read in the file + download_obj.read_remote_containers() + assert len(download_obj.containers_remote) == 33 + assert "depot.galaxyproject.org-singularity-salmon-1.5.2--h84f40af_0.img" in download_obj.containers_remote + assert "MV Rena" not in download_obj.containers_remote # decoy in test file + # # Tests for the main entry method 'download_workflow' # @@ -184,6 +214,65 @@ def test_download_workflow_with_success(self, tmp_dir, mock_download_image, mock container="singularity", revision="1.6", compress_type="none", + singularity_cache="copy", ) + download_obj.include_configs = True # suppress prompt, because stderr.is_interactive doesn't. download_obj.download_workflow() + + # + # Test Download for Tower + # + @with_temporary_folder + def test_download_workflow_for_tower(self, tmp_dir): + download_obj = DownloadWorkflow( + pipeline="nf-core/rnaseq", + revision=("3.7", "3.9"), + compress_type="none", + tower=True, + ) + + download_obj.include_configs = False # suppress prompt, because stderr.is_interactive doesn't. + + assert isinstance(download_obj.revision, list) and len(download_obj.revision) == 2 + assert isinstance(download_obj.wf_sha, dict) and len(download_obj.wf_sha) == 0 + assert isinstance(download_obj.wf_download_url, dict) and len(download_obj.wf_download_url) == 0 + + wfs = nf_core.list.Workflows() + wfs.get_remote_workflows() + ( + download_obj.pipeline, + download_obj.wf_revisions, + download_obj.wf_branches, + ) = nf_core.utils.get_repo_releases_branches(download_obj.pipeline, wfs) + + download_obj.get_revision_hash() + + # download_obj.wf_download_url is not set for tower downloads, but the sha values are + assert isinstance(download_obj.wf_sha, dict) and len(download_obj.wf_sha) == 2 + assert isinstance(download_obj.wf_download_url, dict) and len(download_obj.wf_download_url) == 0 + + # The outdir for multiple revisions is the pipeline name and date: e.g. nf-core-rnaseq_2023-04-27_18-54 + assert bool(re.search(r"nf-core-rnaseq_\d{4}-\d{2}-\d{1,2}_\d{1,2}-\d{1,2}", download_obj.outdir, re.S)) + + download_obj.output_filename = f"{download_obj.outdir}.git" + download_obj.download_workflow_tower(location=tmp_dir) + + assert download_obj.workflow_repo + assert isinstance(download_obj.workflow_repo, WorkflowRepo) + assert issubclass(type(download_obj.workflow_repo), SyncedRepo) + # corroborate that the other revisions are inaccessible to the user. + assert len(download_obj.workflow_repo.tags) == len(download_obj.revision) + + # manually test container image detection for 3.7 revision + download_obj.workflow_repo.checkout(download_obj.wf_sha["3.7"]) + assert isinstance(download_obj.containers, list) and len(download_obj.containers) == 0 + download_obj.find_container_images(download_obj.workflow_repo.access()) + assert len(download_obj.containers) == 30 # 30 containers for 3.7 + assert ( + "https://depot.galaxyproject.org/singularity/bbmap:38.93--he522d1c_0" in download_obj.containers + ) # direct definition + assert ( + "https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0" + in download_obj.containers + ) # indirect definition via $container variable. From f599237c21557e2692da7e05c232bfe4f290c5a0 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 2 May 2023 15:52:01 +0200 Subject: [PATCH 36/42] Move alterations from Version 2.8 (which this PR didn't make anymore) to Version 2.9dev. --- CHANGELOG.md | 9 ++++----- README.md | 7 ++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 513f1b014e..fde3775e70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ - Move registry definitions out of profile scope ([#2286])(https://github.com/nf-core/tools/pull/2286) - Remove `aws_tower` profile ([#2287])(https://github.com/nf-core/tools/pull/2287) - Fixed the Slack report to include the pipeline name ([#2291](https://github.com/nf-core/tools/pull/2291)) +### Download + +- Introduce a `--tower` flag for `nf-core download` to obtain pipelines in an offline format suited for [seqeralabs® Nextflow Tower](https://cloud.tower.nf/) ([#2247](https://github.com/nf-core/tools/pull/2247)). +- Refactored the CLI for `--singularity-cache` in `nf-core download` from a flag to an argument. The prior options were renamed to `amend` (container images are only saved in the `$NXF_SINGULARITY_CACHEDIR`) and `copy` (a copy of the image is saved with the download). `remote` was newly introduced and allows to provide a table of contents of a remote cache via an additional argument `--singularity-cache-index` ([#2247](https://github.com/nf-core/tools/pull/2247)). ### Linting @@ -49,11 +53,6 @@ - Removed `quay.io` from all module Docker container references as this is now supplied at pipeline level. ([#2249](https://github.com/nf-core/tools/pull/2249)) - Remove `CITATION.cff` file from pipeline template, to avoid that pipeline Zenodo entries reference the nf-core publication instead of the pipeline ([#2059](https://github.com/nf-core/tools/pull/2059)). -### Download - -- Introduce a `--tower` flag for `nf-core download` to obtain pipelines in an offline format suited for [seqeralabs® Nextflow Tower](https://cloud.tower.nf/) ([#2247](https://github.com/nf-core/tools/pull/2247)). -- Refactored the CLI for `--singularity-cache` in `nf-core download` from a flag to an argument. The prior options were renamed to `amend` (container images are only saved in the `$NXF_SINGULARITY_CACHEDIR`) and `copy` (a copy of the image is saved with the download). `remote` was newly introduced and allows to provide a table of contents of a remote cache via an additional argument `--singularity-cache-index` ([#2247](https://github.com/nf-core/tools/pull/2247)). - ### Linting - Update modules lint test to fail if enable_conda is found ([#2213](https://github.com/nf-core/tools/pull/2213)) diff --git a/README.md b/README.md index 28c764a09a..06cae66c8e 100644 --- a/README.md +++ b/README.md @@ -373,12 +373,9 @@ We highly recommend setting the `$NXF_SINGULARITY_CACHEDIR` environment variable If found, the tool will fetch the Singularity images to this directory first before copying to the target output archive / directory. Any images previously fetched will be found there and copied directly - this includes images that may be shared with other pipelines or previous pipeline version downloads or download attempts. -If you are running the download on the same system where you will be running the pipeline (eg. a shared filesystem where Nextflow won't have an internet connection at a later date), you can choose to _only_ use the cache via a prompt or cli options `--singularity-cache amend`. +If you are running the download on the same system where you will be running the pipeline (eg. a shared filesystem where Nextflow won't have an internet connection at a later date), you can choose to _only_ use the cache via a prompt or cli options `--singularity-cache amend`. This instructs `nf-core download` to fetch all Singularity images to the `$NXF_SINGULARITY_CACHEDIR` directory but does _not_ copy them to the workflow archive / directory. The workflow config file is _not_ edited. This means that when you later run the workflow, Nextflow will just use the cache folder directly. -This instructs `nf-core download` to fetch all Singularity images to the `$NXF_SINGULARITY_CACHEDIR` directory but does _not_ copy them to the workflow archive / directory. -The workflow config file is _not_ edited. This means that when you later run the workflow, Nextflow will just use the cache folder directly. - -If you are downloading a workflow for a different system, you can provide information about its image cache to `nf-core download`. To avoid unnecessary downloads, choose `--singularity-cache remote` and provide a list of already available images as plain text file to `--singularity-cache-index my_list_of_remotely_available_images.txt`. To generate this list on the remote system, run `find $NXF_SINGULARITY_CACHEDIR -name "*.img" > my_list_of_remotely_available_images.txt`. +If you are downloading a workflow for a different system, you can provide information about its image cache to `nf-core download`. To avoid unnecessary container image downloads, choose `--singularity-cache remote` and provide a list of already available images as plain text file to `--singularity-cache-index my_list_of_remotely_available_images.txt`. To generate this list on the remote system, run `find $NXF_SINGULARITY_CACHEDIR -name "*.img" > my_list_of_remotely_available_images.txt`. The tool will then only download and copy images into your output directory, which are missing on the remote system. #### How the Singularity image downloads work From 2518a4bfda260064fae4b681777822e89d9e0586 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 2 May 2023 16:12:54 +0200 Subject: [PATCH 37/42] Adding the info about remote containers to the summary log rather than showing it separately. --- nf_core/download.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index db98b17f22..cfccf2a235 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -162,6 +162,10 @@ def download_workflow(self): ] if self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None: summary_log.append(f"Using [blue]$NXF_SINGULARITY_CACHEDIR[/]': {os.environ['NXF_SINGULARITY_CACHEDIR']}") + if self.containers_remote: + summary_log.append( + f"Successfully read {len(self.containers_remote)} containers from the remote '[blue]$NXF_SINGULARITY_CACHEDIR[/]' contents." + ) # Set an output filename now that we have the outdir if self.tower: @@ -523,10 +527,6 @@ def read_remote_containers(self): self.containers_remote.append(match.group(0)) if n_total_images == 0: raise LookupError("Could not find valid container names in the index file.") - else: - log.info( - f"Successfully read {n_total_images} containers from the remote $NXF_SINGULARITY_CACHE contents." - ) self.containers_remote = sorted(list(set(self.containers_remote))) except (FileNotFoundError, LookupError) as e: log.error(f"[red]Issue with reading the specified remote $NXF_SINGULARITY_CACHE index:[/]\n{e}\n") @@ -825,7 +825,9 @@ def get_singularity_images(self, current_revision=""): if containers_exist: if self.singularity_cache_index is not None: - log.info(f"{len(containers_exist)} are already cached remotely and won't be retrieved.") + log.info( + f"{len(containers_exist)} containers are already cached remotely and won't be retrieved." + ) # Go through each method of fetching containers in order for container in containers_exist: progress.update(task, description="Image file exists at destination") From 4f390be96a7fc41f01443412fbb065933569336c Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 5 May 2023 11:48:07 +0200 Subject: [PATCH 38/42] Moved the notification about remote containers to summary_log. --- nf_core/download.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index cfccf2a235..8a65252e3b 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -161,10 +161,10 @@ def download_workflow(self): f"Pull containers: '{self.container}'", ] if self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None: - summary_log.append(f"Using [blue]$NXF_SINGULARITY_CACHEDIR[/]': {os.environ['NXF_SINGULARITY_CACHEDIR']}") + summary_log.append(f"Using [blue]$NXF_SINGULARITY_CACHEDIR[/]': {os.environ['NXF_SINGULARITY_CACHEDIR']}'") if self.containers_remote: summary_log.append( - f"Successfully read {len(self.containers_remote)} containers from the remote '[blue]$NXF_SINGULARITY_CACHEDIR[/]' contents." + f"Successfully read {len(self.containers_remote)} containers from the remote '$NXF_SINGULARITY_CACHEDIR' contents." ) # Set an output filename now that we have the outdir From f8e50684fd4eb5138205d2682ab5b2eaf1f3a3c6 Mon Sep 17 00:00:00 2001 From: Matthias Zepper <6963520+MatthiasZepper@users.noreply.github.com> Date: Tue, 9 May 2023 15:33:40 +0200 Subject: [PATCH 39/42] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Matthias Hörtenhuber --- nf_core/download.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 8a65252e3b..8705add039 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -157,7 +157,7 @@ def download_workflow(self): sys.exit(1) summary_log = [ - f"Pipeline revision: '{', '.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',...['+str(len(self.revision)-2)+' more revisions]...,'+self.revision[-1]}'", + f"Pipeline revision: '{', '.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',...,['+str(len(self.revision)-2)+' more revisions],...,'+self.revision[-1]}'", f"Pull containers: '{self.container}'", ] if self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None: @@ -420,7 +420,7 @@ def prompt_singularity_cachedir_creation(self): """ Optionally, create a permanent entry for the NXF_SINGULARITY_CACHEDIR in the terminal profile. Currently support for bash and zsh. - ToDo: "sh", "bash", "dash", "ash","csh", "tcsh", "ksh", "zsh", "fish", "cmd", "powershell", "pwsh"? + ToDo: "sh", "dash", "ash","csh", "tcsh", "ksh", "fish", "cmd", "powershell", "pwsh"? """ if os.environ["SHELL"] == "/bin/bash": @@ -476,7 +476,7 @@ def prompt_singularity_cachedir_utilization(self): "However if you will transfer the downloaded files to a different system then they should be copied to the target folder." ) self.singularity_cache = questionary.select( - "[blue bold]?[/] [bold]Copy singularity images from [blue not bold]$NXF_SINGULARITY_CACHEDIR[/] to the target folder or amend new images to the cache?[/]", + "Copy singularity images from $NXF_SINGULARITY_CACHEDIR to the target folder or amend new images to the cache?", choices=["amend", "copy"], style=nf_core.utils.nfcore_question_style, ).unsafe_ask() From e5128786de1dcabc6909ea0c0e36406812290298 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 9 May 2023 15:46:26 +0200 Subject: [PATCH 40/42] Fixes suggested by @mirpedrol during review. Thanks! --- README.md | 3 +-- nf_core/download.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 06cae66c8e..dacb50ebc4 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,7 @@ A python package with helper tools for the nf-core community. - [`nf-core` tools update](#update-tools) - [`nf-core list` - List available pipelines](#listing-pipelines) - [`nf-core launch` - Run a pipeline with interactive parameter prompts](#launch-a-pipeline) -- [`nf-core download` - Download pipeline for offline use](#downloading-pipelines-for-offline-use) -- [`nf-core download --tower` - Adapting downloads to Nextflow Tower](#adapting-downloads-to-nextflow-tower) +- [`nf-core download` - Download a pipeline for offline use](#downloading-pipelines-for-offline-use) - [`nf-core licences` - List software licences in a pipeline](#pipeline-software-licences) - [`nf-core create` - Create a new pipeline with the nf-core template](#creating-a-new-pipeline) - [`nf-core lint` - Check pipeline code against nf-core guidelines](#linting-a-workflow) diff --git a/nf_core/download.py b/nf_core/download.py index 8705add039..5d214d2aaf 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -423,13 +423,13 @@ def prompt_singularity_cachedir_creation(self): ToDo: "sh", "dash", "ash","csh", "tcsh", "ksh", "fish", "cmd", "powershell", "pwsh"? """ - if os.environ["SHELL"] == "/bin/bash": + if os.getenv("SHELL", "") == "/bin/bash": shellprofile_path = os.path.expanduser("~/~/.bash_profile") if not os.path.isfile(shellprofile_path): shellprofile_path = os.path.expanduser("~/.bashrc") if not os.path.isfile(shellprofile_path): shellprofile_path = False - elif os.environ["SHELL"] == "/bin/zsh": + elif os.getenv("SHELL", "") == "/bin/zsh": shellprofile_path = os.path.expanduser("~/.zprofile") if not os.path.isfile(shellprofile_path): shellprofile_path = os.path.expanduser("~/.zshenv") From 315b9a3536a1d7cdd661bafc3e7f0f1cc63051a7 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 9 May 2023 17:00:47 +0200 Subject: [PATCH 41/42] @mashehu suggested that downloading the containers should not be optional for Tower downloads. Given that there is the option to provide the list of remote containers to skip their download, I agree that this is reasonable. --- CHANGELOG.md | 1 + nf_core/download.py | 12 ++++++------ nf_core/modules/lint/main_nf.py | 2 +- tests/test_download.py | 8 +++++--- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fde3775e70..89d3fe8f8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - Move registry definitions out of profile scope ([#2286])(https://github.com/nf-core/tools/pull/2286) - Remove `aws_tower` profile ([#2287])(https://github.com/nf-core/tools/pull/2287) - Fixed the Slack report to include the pipeline name ([#2291](https://github.com/nf-core/tools/pull/2291)) + ### Download - Introduce a `--tower` flag for `nf-core download` to obtain pipelines in an offline format suited for [seqeralabs® Nextflow Tower](https://cloud.tower.nf/) ([#2247](https://github.com/nf-core/tools/pull/2247)). diff --git a/nf_core/download.py b/nf_core/download.py index 5d214d2aaf..274132f02b 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -108,10 +108,10 @@ def __init__( self.force = force self.tower = tower self.include_configs = None - self.container = container if not singularity_cache_index else "singularity" - self.singularity_cache = ( - singularity_cache if not singularity_cache_index else "remote" - ) # if a singularity_cache_index is given, use the file and overrule choice. + # force download of containers if a cache index is given or download is meant to be used for Tower. + self.container = "singularity" if singularity_cache_index or bool(tower) else container + # if a singularity_cache_index is given, use the file and overrule choice. + self.singularity_cache = "remote" if singularity_cache_index else singularity_cache self.singularity_cache_index = singularity_cache_index self.parallel_downloads = parallel_downloads @@ -377,7 +377,7 @@ def prompt_config_inclusion(self): def prompt_container_download(self): """Prompt whether to download container images or not""" - if self.container is None and stderr.is_interactive: + if self.container is None and stderr.is_interactive and not self.tower: stderr.print("\nIn addition to the pipeline code, this tool can download software containers.") self.container = questionary.select( "Download software container images:", @@ -722,7 +722,7 @@ def find_container_images(self, workflow_directory): Therefore, we need to repeat the search over the contents, extract the variable name, and use it inside a new regex. To get the variable name ( ${container_id} in above example ), we match the literal word "container" and use lookbehind (reset the match). - Then we skip [^\${}]+ everything that is not $ or curly braces. The next capture group is + Then we skip [^${}]+ everything that is not $ or curly braces. The next capture group is ${ followed by any characters that are not curly braces [^{}]+ and ended by a closing curly brace (}), but only if it's not followed by any other curly braces (?![^{]*}). The latter ensures we capture the innermost variable name. diff --git a/nf_core/modules/lint/main_nf.py b/nf_core/modules/lint/main_nf.py index 8150e7e839..31b8adca3a 100644 --- a/nf_core/modules/lint/main_nf.py +++ b/nf_core/modules/lint/main_nf.py @@ -283,7 +283,7 @@ def check_process_section(self, lines, fix_version, progress_bar): self.failed.append(("docker_tag", "Unable to parse docker tag", self.main_nf)) docker_tag = NoneD if l.startswith("quay.io/"): - l_stripped = re.sub("\W+$", "", l) + l_stripped = re.sub(r"\W+$", "", l) self.failed.append( ( "container_links", diff --git a/tests/test_download.py b/tests/test_download.py index 41fb9c625f..aa2e959f3d 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -190,7 +190,7 @@ def test_remote_container_functionality(self, tmp_dir): download_obj.include_configs = False # suppress prompt, because stderr.is_interactive doesn't. - # test if settings are changed accordingly. + # test if the settings are changed to mandatory defaults, if an external cache index is used. assert download_obj.singularity_cache == "remote" and download_obj.container == "singularity" assert isinstance(download_obj.containers_remote, list) and len(download_obj.containers_remote) == 0 # read in the file @@ -264,9 +264,11 @@ def test_download_workflow_for_tower(self, tmp_dir): # corroborate that the other revisions are inaccessible to the user. assert len(download_obj.workflow_repo.tags) == len(download_obj.revision) - # manually test container image detection for 3.7 revision + # download_obj.download_workflow_tower(location=tmp_dir) will run container image detection for all requested revisions + assert isinstance(download_obj.containers, list) and len(download_obj.containers) == 33 + # manually test container image detection for 3.7 revision only + download_obj.containers = [] # empty container list for the test download_obj.workflow_repo.checkout(download_obj.wf_sha["3.7"]) - assert isinstance(download_obj.containers, list) and len(download_obj.containers) == 0 download_obj.find_container_images(download_obj.workflow_repo.access()) assert len(download_obj.containers) == 30 # 30 containers for 3.7 assert ( From 6a806ee322e880db106e3fce8434d34527afca6b Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 26 May 2023 23:52:43 +0200 Subject: [PATCH 42/42] Bugfix: WorkflowRepo.tidy_tags() did indeed only tidy tags. However, revisions may also be branches. Therefore, I rewrote this function to account for revisions that are not releases. --- nf_core/download.py | 51 +++++++++++++++++++++++++++++++----------- nf_core/synced_repo.py | 2 +- 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 274132f02b..70f61f35a4 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -259,7 +259,7 @@ def download_workflow_tower(self, location=None): ) # Remove tags for those revisions that had not been selected - self.workflow_repo.tidy_tags() + self.workflow_repo.tidy_tags_and_branches() # create a bare clone of the modified repository needed for Tower self.workflow_repo.bare_clone(os.path.join(self.outdir, self.output_filename)) @@ -1157,6 +1157,9 @@ def access(self): def checkout(self, commit): return super().checkout(commit) + def get_remote_branches(self, remote_url): + return super().get_remote_branches(remote_url) + def retry_setup_local_repo(self, skip_confirm=False): self.retries += 1 if skip_confirm or rich.prompt.Confirm.ask( @@ -1236,29 +1239,51 @@ def setup_local_repo(self, remote, location=None, in_cache=True): log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") self.retry_setup_local_repo() - def tidy_tags(self): + def tidy_tags_and_branches(self): """ - Function to delete all tags that point to revisions that are not of interest to the downloader. - This allows a clutter-free experience in Tower. The commits are evidently still available. + Function to delete all tags and branches that are not of interest to the downloader. + This allows a clutter-free experience in Tower. The untagged commits are evidently still available. However, due to local caching, the downloader might also want access to revisions that had been deleted before. In that case, don't bother with re-adding the tags and rather download anew from Github. """ if self.revision and self.repo and self.repo.tags: - desired_tags = self.revision.copy() + # create a set to keep track of the revisions to process & check + desired_revisions = set(self.revision) + + # determine what needs pruning + tags_to_remove = {tag for tag in self.repo.tags if tag.name not in desired_revisions} + heads_to_remove = {head for head in self.repo.heads if head.name not in desired_revisions} + try: - for tag in self.repo.tags: - if tag.name not in self.revision: - self.repo.delete_tag(tag) - else: - desired_tags.remove(tag.name) + # delete unwanted tags from repository + for tag in tags_to_remove: + self.repo.delete_tag(tag) self.tags = self.repo.tags - if len(desired_tags) > 0: + + # switch to a revision that should be kept, because deleting heads fails, if they are checked out (e.g. "master") + self.checkout(self.revision[0]) + + # delete unwanted heads/branches from repository + for head in heads_to_remove: + self.repo.delete_head(head) + + # ensure all desired branches are available + for revision in desired_revisions: + self.checkout(revision) + self.heads = self.repo.heads + + # get all tags and available remote_branches + completed_revisions = {revision.name for revision in self.repo.heads + self.repo.tags} + + # verify that all requested revisions are available. + # a local cache might lack revisions that were deleted during a less comprehensive previous download. + if bool(desired_revisions - completed_revisions): log.info( - f"Locally cached version of the pipeline lacks selected revisions {', '.join(desired_tags)}. Downloading anew from GitHub..." + f"Locally cached version of the pipeline lacks selected revisions {', '.join(desired_revisions - completed_revisions)}. Downloading anew from GitHub..." ) self.retry_setup_local_repo(skip_confirm=True) - self.tidy_tags() + self.tidy_tags_and_branches() except (GitCommandError, InvalidGitRepositoryError) as e: log.error(f"[red]Adapting your pipeline download unfortunately failed:[/]\n{e}\n") self.retry_setup_local_repo(skip_confirm=True) diff --git a/nf_core/synced_repo.py b/nf_core/synced_repo.py index 4bbd4f8443..f78142c031 100644 --- a/nf_core/synced_repo.py +++ b/nf_core/synced_repo.py @@ -196,7 +196,7 @@ def branch_exists(self): def verify_branch(self): """ - Verifies the active branch conforms do the correct directory structure + Verifies the active branch conforms to the correct directory structure """ dir_names = os.listdir(self.local_repo_dir) if "modules" not in dir_names: