diff --git a/nf_core/refgenie.py b/nf_core/refgenie.py new file mode 100644 index 0000000000..d9275b6cfd --- /dev/null +++ b/nf_core/refgenie.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python +""" +Update a nextflow.config file with refgenie genomes +""" + +import logging +import os +import re +from pathlib import Path +from textwrap import dedent + +# import refgenconf +from warnings import warn + +import rich +from rich.logging import RichHandler + +import nf_core.utils + +# Set up logging +log = logging.getLogger(__name__) +log.setLevel(logging.INFO) + +# # Setup rich traceback +stderr = rich.console.Console(stderr=True, force_terminal=nf_core.utils.rich_force_colors()) +rich.traceback.install(console=stderr, width=200, word_wrap=True, extra_lines=1) + +NF_CFG_TEMPLATE = """ +// This is a read-only config file managed by refgenie. Manual changes to this file will be overwritten +// To make changes here, use refgenie to update the reference genome data +params {{ + genomes {{ +{content} + }} +}} +""" + + +def _print_nf_config(rgc): + """ + Generate a nextflow config file with the genomes + from the refgenie config file + Adapted from: https://github.com/refgenie/refgenie_nfcore + + Takes a RefGenConf object as argument + """ + abg = rgc.list_assets_by_genome() + genomes_str = "" + for genome, asset_list in abg.items(): + genomes_str += f" '{genome}' {{\n" + for asset in asset_list: + try: + pth = rgc.seek(genome, asset) + # Catch general exception instead of refgencof exception --> no refgenconf import needed + except Exception as e: + log.warn(f"{genome}/{asset} is incomplete, ignoring...") + else: + genomes_str += f' {asset.ljust(20, " ")} = "{pth}"\n' + genomes_str += " }\n" + + return NF_CFG_TEMPLATE.format(content=genomes_str) + + +def _update_nextflow_home_config(refgenie_genomes_config_file, nxf_home): + """ + Update the $NXF_HOME/config file by adding a includeConfig statement to it + for the 'refgenie_genomes_config_file' if not already defined + """ + # Check if NXF_HOME/config exists and has a + include_config_string = dedent( + f""" + ///// >>> nf-core + RefGenie >>> ///// + // !! Contents within this block are managed by 'nf-core/tools' !! + // Includes auto-generated config file with RefGenie genome assets + includeConfig '{os.path.abspath(refgenie_genomes_config_file)}' + ///// <<< nf-core + RefGenie <<< ///// + """ + ) + nxf_home_config = Path(nxf_home) / "config" + if os.path.exists(nxf_home_config): + # look for include statement in config + has_include_statement = False + with open(nxf_home_config, "r") as fh: + lines = fh.readlines() + for line in lines: + if re.match(f"\s*includeConfig\s*'{os.path.abspath(refgenie_genomes_config_file)}'", line): + has_include_statement = True + break + + # if include statement is missing, add it to the last line + if not has_include_statement: + with open(nxf_home_config, "a") as fh: + fh.write(include_config_string) + + log.info(f"Included refgenie_genomes.config to {nxf_home_config}") + + else: + # create new config and add include statement + with open(nxf_home_config, "w") as fh: + fh.write(include_config_string) + log.info(f"Created new nextflow config file: {nxf_home_config}") + + +def update_config(rgc): + """ + Update the genomes.config file after a local refgenie database has been updated + + This function is executed after running 'refgenie pull /' + The refgenie config file is transformed into a nextflow.config file, which is used to + overwrited the 'refgenie_genomes.config' file. + The path to the target config file is inferred from the following options, in order: + + - the 'nextflow_config' attribute in the refgenie config file + - the NXF_REFGENIE_PATH environment variable + - otherwise defaults to: $NXF_HOME/nf-core/refgenie_genomes.config + + Additionaly, an 'includeConfig' statement is added to the file $NXF_HOME/config + """ + + # Compile nextflow refgenie_genomes.config from refgenie config + refgenie_genomes = _print_nf_config(rgc) + + # Get the path to NXF_HOME + # If NXF_HOME is not set, create it at $HOME/.nextflow + # If $HOME is not set, set nxf_home to false + nxf_home = os.environ.get("NXF_HOME") + if not nxf_home: + try: + nxf_home = Path.home() / ".nextflow" + if not os.path.exists(nxf_home): + log.info(f"Creating NXF_HOME directory at {nxf_home}") + os.makedirs(nxf_home, exist_ok=True) + except RuntimeError: + nxf_home = False + + # Get the path for storing the updated refgenie_genomes.config + if hasattr(rgc, "nextflow_config"): + refgenie_genomes_config_file = rgc.nextflow_config + elif "NXF_REFGENIE_PATH" in os.environ: + refgenie_genomes_config_file = os.environ.get("NXF_REFGENIE_PATH") + elif nxf_home: + refgenie_genomes_config_file = Path(nxf_home) / "nf-core/refgenie_genomes.config" + else: + log.info("Could not determine path to 'refgenie_genomes.config' file.") + return False + + # Save the updated genome config + try: + with open(refgenie_genomes_config_file, "w") as fh: + fh.write(refgenie_genomes) + log.info(f"Updated nf-core genomes config: {refgenie_genomes_config_file}") + except FileNotFoundError as e: + log.warn(f"Could not write to {refgenie_genomes_config_file}") + return False + + # Add include statement to NXF_HOME/config + if nxf_home: + _update_nextflow_home_config(refgenie_genomes_config_file, nxf_home) + + return True diff --git a/requirements-dev.txt b/requirements-dev.txt index 4c97a2313b..37094ce491 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,6 +2,7 @@ pytest-datafiles pytest-cov mock black +refgenie Sphinx sphinx_rtd_theme isort diff --git a/setup.py b/setup.py index 4fdfb7c5a8..f24b87001e 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,10 @@ author_email="phil.ewels@scilifelab.se", url="https://github.com/nf-core/tools", license="MIT", - entry_points={"console_scripts": ["nf-core=nf_core.__main__:run_nf_core"]}, + entry_points={ + "console_scripts": ["nf-core=nf_core.__main__:run_nf_core"], + "refgenie.hooks.post_update": ["nf-core-refgenie=nf_core.refgenie:update_config"], + }, install_requires=required, packages=find_packages(exclude=("docs")), include_package_data=True, diff --git a/tests/test_refgenie.py b/tests/test_refgenie.py new file mode 100644 index 0000000000..f406cd362e --- /dev/null +++ b/tests/test_refgenie.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +""" Tests covering the refgenie integration code +""" + +import os +import shlex +import subprocess +import tempfile +import unittest + +import nf_core.refgenie + + +class TestRefgenie(unittest.TestCase): + """Class for refgenie tests""" + + def setUp(self): + """ + Prepare a refgenie config file + """ + self.tmp_dir = tempfile.mkdtemp() + self.NXF_HOME = os.path.join(self.tmp_dir, ".nextflow") + self.NXF_REFGENIE_PATH = os.path.join(self.NXF_HOME, "nf-core", "refgenie_genomes.config") + self.REFGENIE = os.path.join(self.tmp_dir, "genomes_config.yaml") + # Set NXF_HOME environment variable + # avoids adding includeConfig statement to config file outside the current tmpdir + try: + self.NXF_HOME_ORIGINAL = os.environ["NXF_HOME"] + except: + self.NXF_HOME_ORIGINAL = None + os.environ["NXF_HOME"] = self.NXF_HOME + + # create NXF_HOME and nf-core directories + os.makedirs(os.path.join(self.NXF_HOME, "nf-core"), exist_ok=True) + + # Initialize a refgenie config + os.system(f"refgenie init -c {self.REFGENIE}") + + # Add NXF_REFGENIE_PATH to refgenie config + with open(self.REFGENIE, "a") as fh: + fh.write(f"nextflow_config: {os.path.join(self.NXF_REFGENIE_PATH)}\n") + + def tearDown(self) -> None: + # Remove the tempdir again + os.system(f"rm -rf {self.tmp_dir}") + # Reset NXF_HOME environment variable + if self.NXF_HOME_ORIGINAL is None: + del os.environ["NXF_HOME"] + else: + os.environ["NXF_HOME"] = self.NXF_HOME_ORIGINAL + + def test_update_refgenie_genomes_config(self): + """Test that listing pipelines works""" + # Populate the config with a genome + cmd = f"refgenie pull t7/fasta -c {self.REFGENIE}" + out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT) + + assert "Updated nf-core genomes config" in str(out)