From e7249b70024e442a49125b135bcb1ad552a40103 Mon Sep 17 00:00:00 2001 From: mashehu Date: Tue, 16 Jan 2024 09:46:48 +0100 Subject: [PATCH 01/65] second attempt to add ro crates --- nf_core/ro_crate.py | 46 +++++++++++++++++++++++++++++++++++++++++++++ nf_core/utils.py | 19 +++++++++---------- 2 files changed, 55 insertions(+), 10 deletions(-) create mode 100644 nf_core/ro_crate.py diff --git a/nf_core/ro_crate.py b/nf_core/ro_crate.py new file mode 100644 index 0000000000..5e205fa056 --- /dev/null +++ b/nf_core/ro_crate.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +""" Code to deal with pipeline RO (Research Object) Crates """ + + +import logging +import tempfile +from pathlib import Path + +import rocrate.model.entity +import rocrate.rocrate +from typing import Union + +log = logging.getLogger(__name__) + + +class RoCrate: + """Class to generate an RO Crate for a pipeline""" + + def __init__(self, pipeline_dir: Union[str, Path], version=""): + self.pipeline_dir = pipeline_dir + self.version = version + + def create_ro_create(self, outdir: Path, metadata_fn="", zip_fn=""): + """Create an RO Crate for the pipeline""" + + # Create a temporary directory for the RO Crate + rocrate_dir = tempfile.mkdtemp(prefix="nf-core-ro-crate-") + + # Create the RO Crate + wf_crate = rocrate.rocrate.ROCrate(rocrate_dir) + + # Set main entity file + wf_file = wf_crate.add_file(Path(self.pipeline_dir, "nextflow.config"), "nextflow.config") + wf_crate.mainEntity = wf_file + + # Set language type + programming_language = rocrate.model.entity.Entity( + wf_crate, + "https://www.nextflow.io/", + properties={ + "@type": ["ComputerLanguage", "SoftwareApplication"], + "name": "Nextflow", + "url": "https://www.nextflow.io/", + }, + ) + wf_crate.add(programming_language) diff --git a/nf_core/utils.py b/nf_core/utils.py index 10b21018d2..d2b5fa9a5c 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -215,7 +215,7 @@ def is_pipeline_directory(wf_path): raise UserWarning(f"'{wf_path}' is not a pipeline - '{fn}' is missing") -def fetch_wf_config(wf_path, cache_config=True): +def fetch_wf_config(wf_path: Union[Path, str], cache_config=True) -> dict: """Uses Nextflow to retrieve the the configuration variables from a Nextflow workflow. @@ -228,20 +228,19 @@ def fetch_wf_config(wf_path, cache_config=True): """ log.debug(f"Got '{wf_path}' as path") - + wf_path = Path(wf_path) config = {} cache_fn = None cache_basedir = None cache_path = None # Nextflow home directory - use env var if set, or default to ~/.nextflow - nxf_home = os.environ.get("NXF_HOME", os.path.join(os.getenv("HOME"), ".nextflow")) + nxf_home = Path(os.environ.get("NXF_HOME", Path(os.getenv("HOME", ""), ".nextflow"))) # Build a cache directory if we can - if os.path.isdir(nxf_home): - cache_basedir = os.path.join(nxf_home, "nf-core") - if not os.path.isdir(cache_basedir): - os.mkdir(cache_basedir) + if (nxf_home).is_dir(): + cache_basedir = nxf_home / "nf-core" + cache_basedir.mkdir(parents=True, exist_ok=True) # If we're given a workflow object with a commit, see if we have a cached copy cache_fn = None @@ -249,7 +248,7 @@ def fetch_wf_config(wf_path, cache_config=True): concat_hash = "" for fn in ["nextflow.config", "main.nf"]: try: - with open(os.path.join(wf_path, fn), "rb") as fh: + with open(Path(wf_path, fn), "rb") as fh: concat_hash += hashlib.sha256(fh.read()).hexdigest() except FileNotFoundError: pass @@ -259,8 +258,8 @@ def fetch_wf_config(wf_path, cache_config=True): cache_fn = f"wf-config-cache-{bighash[:25]}.json" if cache_basedir and cache_fn: - cache_path = os.path.join(cache_basedir, cache_fn) - if os.path.isfile(cache_path) and cache_config is True: + cache_path = Path(cache_basedir, cache_fn) + if cache_path.is_file() and cache_config is True: log.debug(f"Found a config cache, loading: {cache_path}") with open(cache_path) as fh: try: From a597c8c6a64fcd8595f3e5811bb846852622762d Mon Sep 17 00:00:00 2001 From: mashehu Date: Tue, 16 Jan 2024 09:49:01 +0100 Subject: [PATCH 02/65] fix import --- nf_core/ro_crate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nf_core/ro_crate.py b/nf_core/ro_crate.py index 5e205fa056..abdb926e6f 100644 --- a/nf_core/ro_crate.py +++ b/nf_core/ro_crate.py @@ -5,10 +5,10 @@ import logging import tempfile from pathlib import Path +from typing import Union import rocrate.model.entity import rocrate.rocrate -from typing import Union log = logging.getLogger(__name__) From 7a66e067b0d7a4635aac03a59f101825cee8ad8f Mon Sep 17 00:00:00 2001 From: mashehu Date: Tue, 23 Jan 2024 18:21:53 +0100 Subject: [PATCH 03/65] first running version --- nf_core/__main__.py | 577 ++++++++++++++++++++++++++++++++++++++------ nf_core/ro_crate.py | 173 ++++++++++++- nf_core/schema.py | 6 +- nf_core/utils.py | 98 ++++++-- 4 files changed, 755 insertions(+), 99 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index e02464b922..6dadadf22b 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -3,6 +3,7 @@ import logging import os import sys +from pathlib import Path import rich import rich.console @@ -31,11 +32,27 @@ "nf-core": [ { "name": "Commands for users", - "commands": ["list", "launch", "create-params-file", "download", "licences", "tui"], + "commands": [ + "list", + "launch", + "create-params-file", + "download", + "licences", + "tui", + ], }, { "name": "Commands for developers", - "commands": ["create", "lint", "modules", "subworkflows", "schema", "bump-version", "sync"], + "commands": [ + "create", + "lint", + "modules", + "subworkflows", + "schema", + "bump-version", + "sync", + "rocrate", + ], }, ], "nf-core modules": [ @@ -90,10 +107,22 @@ def run_nf_core(): if os.environ.get("_NF_CORE_COMPLETE") is None: # Print nf-core header stderr.print(f"\n[green]{' ' * 42},--.[grey39]/[green],-.", highlight=False) - stderr.print("[blue] ___ __ __ __ ___ [green]/,-._.--~\\", highlight=False) - stderr.print(r"[blue] |\ | |__ __ / ` / \ |__) |__ [yellow] } {", highlight=False) - stderr.print(r"[blue] | \| | \__, \__/ | \ |___ [green]\`-._,-`-,", highlight=False) - stderr.print("[green] `._,._,'\n", highlight=False) + stderr.print( + "[blue] ___ __ __ __ ___ [green]/,-._.--~\\", + highlight=False, + ) + stderr.print( + r"[blue] |\ | |__ __ / ` / \ |__) |__ [yellow] } {", + highlight=False, + ) + stderr.print( + r"[blue] | \| | \__, \__/ | \ |___ [green]\`-._,-`-,", + highlight=False, + ) + stderr.print( + "[green] `._,._,'\n", + highlight=False, + ) stderr.print( f"[grey39] nf-core/tools version {__version__} - [link=https://nf-co.re]https://nf-co.re[/]", highlight=False, @@ -115,7 +144,13 @@ def run_nf_core(): @tui() @click.group(context_settings=dict(help_option_names=["-h", "--help"])) @click.version_option(__version__) -@click.option("-v", "--verbose", is_flag=True, default=False, help="Print verbose output to the console.") +@click.option( + "-v", + "--verbose", + is_flag=True, + default=False, + help="Print verbose output to the console.", +) @click.option("--hide-progress", is_flag=True, default=False, help="Don't show progress bars.") @click.option("-l", "--log-file", help="Save a verbose log to a file.", metavar="") @click.pass_context @@ -182,7 +217,11 @@ def list_pipelines(keywords, sort, json, show_archived): @click.option("-r", "--revision", help="Release/branch/SHA of the project to run (if remote)") @click.option("-i", "--id", help="ID for web-gui launch parameter set") @click.option( - "-c", "--command-only", is_flag=True, default=False, help="Create Nextflow command with params (no params file)" + "-c", + "--command-only", + is_flag=True, + default=False, + help="Create Nextflow command with params (no params file)", ) @click.option( "-o", @@ -192,18 +231,43 @@ def list_pipelines(keywords, sort, json, show_archived): help="Path to save run parameters file", ) @click.option( - "-p", "--params-in", type=click.Path(exists=True), help="Set of input run params to use from a previous run" + "-p", + "--params-in", + type=click.Path(exists=True), + help="Set of input run params to use from a previous run", ) @click.option( - "-a", "--save-all", is_flag=True, default=False, help="Save all parameters, even if unchanged from default" + "-a", + "--save-all", + is_flag=True, + default=False, + help="Save all parameters, even if unchanged from default", ) @click.option( - "-x", "--show-hidden", is_flag=True, default=False, help="Show hidden params which don't normally need changing" + "-x", + "--show-hidden", + is_flag=True, + default=False, + help="Show hidden params which don't normally need changing", ) @click.option( - "-u", "--url", type=str, default="https://nf-co.re/launch", help="Customise the builder URL (for development work)" + "-u", + "--url", + type=str, + default="https://nf-co.re/launch", + help="Customise the builder URL (for development work)", ) -def launch(pipeline, id, revision, command_only, params_in, params_out, save_all, show_hidden, url): +def launch( + pipeline, + id, + revision, + command_only, + params_in, + params_out, + save_all, + show_hidden, + url, +): """ Launch a pipeline using a web GUI or command line prompts. @@ -219,7 +283,17 @@ def launch(pipeline, id, revision, command_only, params_in, params_out, save_all """ from nf_core.launch import Launch - launcher = Launch(pipeline, revision, command_only, params_in, params_out, save_all, show_hidden, url, id) + launcher = Launch( + pipeline, + revision, + command_only, + params_in, + params_out, + save_all, + show_hidden, + url, + id, + ) if not launcher.launch_pipeline(): sys.exit(1) @@ -238,7 +312,11 @@ def launch(pipeline, id, revision, command_only, params_in, params_out, save_all ) @click.option("-f", "--force", is_flag=True, default=False, help="Overwrite existing files") @click.option( - "-x", "--show-hidden", is_flag=True, default=False, help="Show hidden params which don't normally need changing" + "-x", + "--show-hidden", + is_flag=True, + default=False, + help="Show hidden params which don't normally need changing", ) def create_params_file(pipeline, revision, output, force, show_hidden): """ @@ -269,10 +347,19 @@ def create_params_file(pipeline, revision, output, force, show_hidden): ) @click.option("-o", "--outdir", type=str, help="Output directory") @click.option( - "-x", "--compress", type=click.Choice(["tar.gz", "tar.bz2", "zip", "none"]), help="Archive compression type" + "-x", + "--compress", + type=click.Choice(["tar.gz", "tar.bz2", "zip", "none"]), + help="Archive compression type", ) @click.option("-f", "--force", is_flag=True, default=False, help="Overwrite existing files") -@click.option("-t", "--tower", is_flag=True, default=False, help="Download for seqeralabs® Nextflow Tower") +@click.option( + "-t", + "--tower", + is_flag=True, + default=False, + help="Download for seqeralabs® Nextflow Tower", +) @click.option( "-d", "--download-configuration", @@ -306,7 +393,13 @@ def create_params_file(pipeline, revision, output, force, show_hidden): type=str, help="List of images already available in a remote `singularity.cacheDir`.", ) -@click.option("-p", "--parallel-downloads", type=int, default=4, help="Number of parallel image downloads") +@click.option( + "-p", + "--parallel-downloads", + type=int, + default=4, + help="Number of parallel image downloads", +) def download( pipeline, revision, @@ -380,7 +473,13 @@ def licences(pipeline, json): @click.option("-d", "--description", type=str, help="A short description of your pipeline") @click.option("-a", "--author", type=str, help="Name of the main author(s)") @click.option("--version", type=str, default="1.0dev", help="The initial version number to use") -@click.option("-f", "--force", is_flag=True, default=False, help="Overwrite output directory if it already exists") +@click.option( + "-f", + "--force", + is_flag=True, + default=False, + help="Overwrite output directory if it already exists", +) @click.option("-o", "--outdir", help="Output directory for new pipeline (default: pipeline name)") @click.option("-t", "--template-yaml", help="Pass a YAML file to customize the template") @click.option("--plain", is_flag=True, help="Use the standard nf-core template") @@ -428,14 +527,36 @@ def create(name, description, author, version, force, outdir, template_yaml, pla help="Execute additional checks for release-ready workflows.", ) @click.option( - "-f", "--fix", type=str, metavar="", multiple=True, help="Attempt to automatically fix specified lint test" + "-f", + "--fix", + type=str, + metavar="", + multiple=True, + help="Attempt to automatically fix specified lint test", +) +@click.option( + "-k", + "--key", + type=str, + metavar="", + multiple=True, + help="Run only these lint tests", ) -@click.option("-k", "--key", type=str, metavar="", multiple=True, help="Run only these lint tests") @click.option("-p", "--show-passed", is_flag=True, help="Show passing tests on the command line") @click.option("-i", "--fail-ignored", is_flag=True, help="Convert ignored tests to failures") @click.option("-w", "--fail-warned", is_flag=True, help="Convert warn tests to failures") -@click.option("--markdown", type=str, metavar="", help="File to write linting results to (Markdown)") -@click.option("--json", type=str, metavar="", help="File to write linting results to (JSON)") +@click.option( + "--markdown", + type=str, + metavar="", + help="File to write linting results to (Markdown)", +) +@click.option( + "--json", + type=str, + metavar="", + help="File to write linting results to (JSON)", +) @click.option( "--sort-by", type=click.Choice(["module", "test"]), @@ -444,7 +565,19 @@ def create(name, description, author, version, force, outdir, template_yaml, pla show_default=True, ) @click.pass_context -def lint(ctx, dir, release, fix, key, show_passed, fail_ignored, fail_warned, markdown, json, sort_by): +def lint( + ctx, + dir, + release, + fix, + key, + show_passed, + fail_ignored, + fail_warned, + markdown, + json, + sort_by, +): """ Check pipeline code against nf-core guidelines. @@ -499,7 +632,13 @@ def lint(ctx, dir, release, fix, key, show_passed, fail_ignored, fail_warned, ma default=NF_CORE_MODULES_REMOTE, help="Remote git repo to fetch files from", ) -@click.option("-b", "--branch", type=str, default=None, help="Branch of git repository hosting modules.") +@click.option( + "-b", + "--branch", + type=str, + default=None, + help="Branch of git repository hosting modules.", +) @click.option( "-N", "--no-pull", @@ -531,7 +670,13 @@ def modules(ctx, git_remote, branch, no_pull): default=NF_CORE_MODULES_REMOTE, help="Remote git repo to fetch files from", ) -@click.option("-b", "--branch", type=str, default=None, help="Branch of git repository hosting modules.") +@click.option( + "-b", + "--branch", + type=str, + default=None, + help="Branch of git repository hosting modules.", +) @click.option( "-N", "--no-pull", @@ -632,8 +777,20 @@ def modules_list_local(ctx, keywords, json, dir): # pylint: disable=redefined-b default=".", help=r"Pipeline directory. [dim]\[default: current working directory][/]", ) -@click.option("-p", "--prompt", is_flag=True, default=False, help="Prompt for the version of the module") -@click.option("-f", "--force", is_flag=True, default=False, help="Force reinstallation of module if it already exists") +@click.option( + "-p", + "--prompt", + is_flag=True, + default=False, + help="Prompt for the version of the module", +) +@click.option( + "-f", + "--force", + is_flag=True, + default=False, + help="Force reinstallation of module if it already exists", +) @click.option("-s", "--sha", type=str, metavar="", help="Install module at commit SHA") def modules_install(ctx, tool, dir, prompt, force, sha): """ @@ -674,10 +831,21 @@ def modules_install(ctx, tool, dir, prompt, force, sha): help=r"Pipeline directory. [dim]\[default: current working directory][/]", ) @click.option("-f", "--force", is_flag=True, default=False, help="Force update of module") -@click.option("-p", "--prompt", is_flag=True, default=False, help="Prompt for the version of the module") +@click.option( + "-p", + "--prompt", + is_flag=True, + default=False, + help="Prompt for the version of the module", +) @click.option("-s", "--sha", type=str, metavar="", help="Install module at commit SHA") @click.option( - "-a", "--all", "install_all", is_flag=True, default=False, help="Update all modules installed in pipeline" + "-a", + "--all", + "install_all", + is_flag=True, + default=False, + help="Update all modules installed in pipeline", ) @click.option( "-x/-y", @@ -701,7 +869,18 @@ def modules_install(ctx, tool, dir, prompt, force, sha): default=False, help="Automatically update all linked modules and subworkflows without asking for confirmation", ) -def modules_update(ctx, tool, directory, force, prompt, sha, install_all, preview, save_diff, update_deps): +def modules_update( + ctx, + tool, + directory, + force, + prompt, + sha, + install_all, + preview, + save_diff, + update_deps, +): """ Update DSL2 modules within a pipeline. @@ -803,13 +982,55 @@ def modules_remove(ctx, dir, tool): @click.pass_context @click.argument("tool", type=str, required=False, metavar=" or ") @click.option("-d", "--dir", type=click.Path(exists=True), default=".", metavar="") -@click.option("-a", "--author", type=str, metavar="", help="Module author's GitHub username prefixed with '@'") -@click.option("-l", "--label", type=str, metavar="", help="Standard resource label for process") -@click.option("-m", "--meta", is_flag=True, default=False, help="Use Groovy meta map for sample information") -@click.option("-n", "--no-meta", is_flag=True, default=False, help="Don't use meta map for sample information") -@click.option("-f", "--force", is_flag=True, default=False, help="Overwrite any files if they already exist") -@click.option("-c", "--conda-name", type=str, default=None, help="Name of the conda package to use") -@click.option("-p", "--conda-package-version", type=str, default=None, help="Version of conda package to use") +@click.option( + "-a", + "--author", + type=str, + metavar="", + help="Module author's GitHub username prefixed with '@'", +) +@click.option( + "-l", + "--label", + type=str, + metavar="", + help="Standard resource label for process", +) +@click.option( + "-m", + "--meta", + is_flag=True, + default=False, + help="Use Groovy meta map for sample information", +) +@click.option( + "-n", + "--no-meta", + is_flag=True, + default=False, + help="Don't use meta map for sample information", +) +@click.option( + "-f", + "--force", + is_flag=True, + default=False, + help="Overwrite any files if they already exist", +) +@click.option( + "-c", + "--conda-name", + type=str, + default=None, + help="Name of the conda package to use", +) +@click.option( + "-p", + "--conda-package-version", + type=str, + default=None, + help="Version of conda package to use", +) @click.option( "-i", "--empty-template", @@ -817,7 +1038,12 @@ def modules_remove(ctx, dir, tool): default=False, help="Create a module from the template without TODOs or examples", ) -@click.option("--migrate-pytest", is_flag=True, default=False, help="Migrate a module with pytest tests to nf-test") +@click.option( + "--migrate-pytest", + is_flag=True, + default=False, + help="Migrate a module with pytest tests to nf-test", +) def create_module( ctx, tool, @@ -855,7 +1081,16 @@ def create_module( # Run function try: module_create = ModuleCreate( - dir, tool, author, label, has_meta, force, conda_name, conda_package_version, empty_template, migrate_pytest + dir, + tool, + author, + label, + has_meta, + force, + conda_name, + conda_package_version, + empty_template, + migrate_pytest, ) module_create.create() except UserWarning as e: @@ -870,10 +1105,28 @@ def create_module( @modules.command("test") @click.pass_context @click.argument("tool", type=str, required=False, metavar=" or ") -@click.option("-d", "--dir", type=click.Path(exists=True), default=".", metavar="") -@click.option("-p", "--no-prompts", is_flag=True, default=False, help="Use defaults without prompting") +@click.option( + "-d", + "--dir", + type=click.Path(exists=True), + default=".", + metavar="", +) +@click.option( + "-p", + "--no-prompts", + is_flag=True, + default=False, + help="Use defaults without prompting", +) @click.option("-u", "--update", is_flag=True, default=False, help="Update existing snapshots") -@click.option("-o", "--once", is_flag=True, default=False, help="Run tests only once. Don't check snapshot stability") +@click.option( + "-o", + "--once", + is_flag=True, + default=False, + help="Run tests only once. Don't check snapshot stability", +) def test_module(ctx, tool, dir, no_prompts, update, once): """ Run nf-test for a module. @@ -904,7 +1157,13 @@ def test_module(ctx, tool, dir, no_prompts, update, once): @modules.command("lint") @click.pass_context @click.argument("tool", type=str, required=False, metavar=" or ") -@click.option("-d", "--dir", type=click.Path(exists=True), default=".", metavar="") +@click.option( + "-d", + "--dir", + type=click.Path(exists=True), + default=".", + metavar="", +) @click.option( "-r", "--registry", @@ -913,7 +1172,14 @@ def test_module(ctx, tool, dir, no_prompts, update, once): default=None, help="Registry to use for containers. If not specified it will use docker.registry value in the nextflow.config file", ) -@click.option("-k", "--key", type=str, metavar="", multiple=True, help="Run only these lint tests") +@click.option( + "-k", + "--key", + type=str, + metavar="", + multiple=True, + help="Run only these lint tests", +) @click.option("-a", "--all", is_flag=True, help="Run on all modules") @click.option("-w", "--fail-warned", is_flag=True, help="Convert warn tests to failures") @click.option("--local", is_flag=True, help="Run additional lint tests for local modules") @@ -925,7 +1191,11 @@ def test_module(ctx, tool, dir, no_prompts, update, once): help="Sort lint output by module or test name.", show_default=True, ) -@click.option("--fix-version", is_flag=True, help="Fix the module version if a newer version is available") +@click.option( + "--fix-version", + is_flag=True, + help="Fix the module version if a newer version is available", +) def modules_lint(ctx, tool, dir, registry, key, all, fail_warned, local, passed, sort_by, fix_version): """ Lint one or more modules in a directory. @@ -1013,7 +1283,13 @@ def modules_info(ctx, tool, dir): @modules.command() @click.pass_context @click.argument("tool", type=str, required=False, metavar=" or ") -@click.option("-d", "--dir", type=click.Path(exists=True), default=".", metavar="") +@click.option( + "-d", + "--dir", + type=click.Path(exists=True), + default=".", + metavar="", +) @click.option("-a", "--all", is_flag=True, help="Run on all modules") @click.option("-s", "--show-all", is_flag=True, help="Show up-to-date modules in results too") def bump_versions(ctx, tool, dir, all, show_all): @@ -1045,9 +1321,26 @@ def bump_versions(ctx, tool, dir, all, show_all): @click.pass_context @click.argument("subworkflow", type=str, required=False, metavar="subworkflow name") @click.option("-d", "--dir", type=click.Path(exists=True), default=".", metavar="") -@click.option("-a", "--author", type=str, metavar="", help="Module author's GitHub username prefixed with '@'") -@click.option("-f", "--force", is_flag=True, default=False, help="Overwrite any files if they already exist") -@click.option("--migrate-pytest", is_flag=True, default=False, help="Migrate a module with pytest tests to nf-test") +@click.option( + "-a", + "--author", + type=str, + metavar="", + help="Module author's GitHub username prefixed with '@'", +) +@click.option( + "-f", + "--force", + is_flag=True, + default=False, + help="Overwrite any files if they already exist", +) +@click.option( + "--migrate-pytest", + is_flag=True, + default=False, + help="Migrate a module with pytest tests to nf-test", +) def create_subworkflow(ctx, subworkflow, dir, author, force, migrate_pytest): """ Create a new subworkflow from the nf-core template. @@ -1076,10 +1369,28 @@ def create_subworkflow(ctx, subworkflow, dir, author, force, migrate_pytest): @subworkflows.command("test") @click.pass_context @click.argument("subworkflow", type=str, required=False, metavar="subworkflow name") -@click.option("-d", "--dir", type=click.Path(exists=True), default=".", metavar="") -@click.option("-p", "--no-prompts", is_flag=True, default=False, help="Use defaults without prompting") +@click.option( + "-d", + "--dir", + type=click.Path(exists=True), + default=".", + metavar="", +) +@click.option( + "-p", + "--no-prompts", + is_flag=True, + default=False, + help="Use defaults without prompting", +) @click.option("-u", "--update", is_flag=True, default=False, help="Update existing snapshots") -@click.option("-o", "--once", is_flag=True, default=False, help="Run tests only once. Don't check snapshot stability") +@click.option( + "-o", + "--once", + is_flag=True, + default=False, + help="Run tests only once. Don't check snapshot stability", +) def test_subworkflow(ctx, subworkflow, dir, no_prompts, update, once): """ Run nf-test for a subworkflow. @@ -1178,7 +1489,13 @@ def subworkflows_list_local(ctx, keywords, json, dir): # pylint: disable=redefi @subworkflows.command("lint") @click.pass_context @click.argument("subworkflow", type=str, required=False, metavar="subworkflow name") -@click.option("-d", "--dir", type=click.Path(exists=True), default=".", metavar="") +@click.option( + "-d", + "--dir", + type=click.Path(exists=True), + default=".", + metavar="", +) @click.option( "-r", "--registry", @@ -1187,7 +1504,14 @@ def subworkflows_list_local(ctx, keywords, json, dir): # pylint: disable=redefi default=None, help="Registry to use for containers. If not specified it will use docker.registry value in the nextflow.config file", ) -@click.option("-k", "--key", type=str, metavar="", multiple=True, help="Run only these lint tests") +@click.option( + "-k", + "--key", + type=str, + metavar="", + multiple=True, + help="Run only these lint tests", +) @click.option("-a", "--all", is_flag=True, help="Run on all subworkflows") @click.option("-w", "--fail-warned", is_flag=True, help="Convert warn tests to failures") @click.option("--local", is_flag=True, help="Run additional lint tests for local subworkflows") @@ -1292,11 +1616,27 @@ def subworkflows_info(ctx, tool, dir): default=".", help=r"Pipeline directory. [dim]\[default: current working directory][/]", ) -@click.option("-p", "--prompt", is_flag=True, default=False, help="Prompt for the version of the subworkflow") @click.option( - "-f", "--force", is_flag=True, default=False, help="Force reinstallation of subworkflow if it already exists" + "-p", + "--prompt", + is_flag=True, + default=False, + help="Prompt for the version of the subworkflow", +) +@click.option( + "-f", + "--force", + is_flag=True, + default=False, + help="Force reinstallation of subworkflow if it already exists", +) +@click.option( + "-s", + "--sha", + type=str, + metavar="", + help="Install subworkflow at commit SHA", ) -@click.option("-s", "--sha", type=str, metavar="", help="Install subworkflow at commit SHA") def subworkflows_install(ctx, subworkflow, dir, prompt, force, sha): """ Install DSL2 subworkflow within a pipeline. @@ -1365,10 +1705,27 @@ def subworkflows_remove(ctx, dir, subworkflow): help=r"Pipeline directory. [dim]\[default: current working directory][/]", ) @click.option("-f", "--force", is_flag=True, default=False, help="Force update of subworkflow") -@click.option("-p", "--prompt", is_flag=True, default=False, help="Prompt for the version of the subworkflow") -@click.option("-s", "--sha", type=str, metavar="", help="Install subworkflow at commit SHA") @click.option( - "-a", "--all", "install_all", is_flag=True, default=False, help="Update all subworkflow installed in pipeline" + "-p", + "--prompt", + is_flag=True, + default=False, + help="Prompt for the version of the subworkflow", +) +@click.option( + "-s", + "--sha", + type=str, + metavar="", + help="Install subworkflow at commit SHA", +) +@click.option( + "-a", + "--all", + "install_all", + is_flag=True, + default=False, + help="Update all subworkflow installed in pipeline", ) @click.option( "-x/-y", @@ -1392,7 +1749,18 @@ def subworkflows_remove(ctx, dir, subworkflow): default=False, help="Automatically update all linked modules and subworkflows without asking for confirmation", ) -def subworkflows_update(ctx, subworkflow, dir, force, prompt, sha, install_all, preview, save_diff, update_deps): +def subworkflows_update( + ctx, + subworkflow, + dir, + force, + prompt, + sha, + install_all, + preview, + save_diff, + update_deps, +): """ Update DSL2 subworkflow within a pipeline. @@ -1474,12 +1842,20 @@ def validate(pipeline, params): default=".", help=r"Pipeline directory. [dim]\[default: current working directory][/]", ) -@click.option("--no-prompts", is_flag=True, help="Do not confirm changes, just update parameters and exit") -@click.option("--web-only", is_flag=True, help="Skip building using Nextflow config, just launch the web tool") +@click.option( + "--no-prompts", + is_flag=True, + help="Do not confirm changes, just update parameters and exit", +) +@click.option( + "--web-only", + is_flag=True, + help="Skip building using Nextflow config, just launch the web tool", +) @click.option( "--url", type=str, - default="https://nf-co.re/pipeline_schema_builder", + default="https://oldsite.nf-co.re/pipeline_schema_builder", help="Customise the builder URL (for development work)", ) def build(dir, no_prompts, web_only, url): @@ -1508,7 +1884,10 @@ def build(dir, no_prompts, web_only, url): # nf-core schema lint @schema.command("lint") @click.argument( - "schema_path", type=click.Path(exists=True), default="nextflow_schema.json", metavar="" + "schema_path", + type=click.Path(exists=True), + default="nextflow_schema.json", + metavar="", ) def schema_lint(schema_path): """ @@ -1545,9 +1924,19 @@ def schema_lint(schema_path): required=False, metavar="", ) -@click.option("-o", "--output", type=str, metavar="", help="Output filename. Defaults to standard out.") @click.option( - "-x", "--format", type=click.Choice(["markdown", "html"]), default="markdown", help="Format to output docs in." + "-o", + "--output", + type=str, + metavar="", + help="Output filename. Defaults to standard out.", +) +@click.option( + "-x", + "--format", + type=click.Choice(["markdown", "html"]), + default="markdown", + help="Format to output docs in.", ) @click.option("-f", "--force", is_flag=True, default=False, help="Overwrite existing files") @click.option( @@ -1586,7 +1975,11 @@ def docs(schema_path, output, format, force, columns): help=r"Pipeline directory. [dim]\[default: current working directory][/]", ) @click.option( - "-n", "--nextflow", is_flag=True, default=False, help="Bump required nextflow version instead of pipeline version" + "-n", + "--nextflow", + is_flag=True, + default=False, + help="Bump required nextflow version instead of pipeline version", ) def bump_version(new_version, dir, nextflow): """ @@ -1631,8 +2024,19 @@ def bump_version(new_version, dir, nextflow): default=".", help=r"Pipeline directory. [dim]\[default: current working directory][/]", ) -@click.option("-b", "--from-branch", type=str, help="The git branch to use to fetch workflow variables.") -@click.option("-p", "--pull-request", is_flag=True, default=False, help="Make a GitHub pull-request with the changes.") +@click.option( + "-b", + "--from-branch", + type=str, + help="The git branch to use to fetch workflow variables.", +) +@click.option( + "-p", + "--pull-request", + is_flag=True, + default=False, + help="Make a GitHub pull-request with the changes.", +) @click.option("-g", "--github-repository", type=str, help="GitHub PR: target repository.") @click.option("-u", "--username", type=str, help="GitHub PR: auth username.") @click.option("-t", "--template-yaml", help="Pass a YAML file to customize the template") @@ -1664,6 +2068,37 @@ def sync(dir, from_branch, pull_request, github_repository, username, template_y sys.exit(1) +# nf-core rocrate +@nf_core_cli.command("rocrate") +@click.argument( + "pipeline_dir", + type=click.Path(exists=True), + default=Path.cwd(), + required=True, + metavar="", +) +@click.option( + "-j", + "--json", + default="ro-crate-metadata.json", + type=str, + help="Path to save RO Crate metadata json", +) +@click.option("-z", "--zip", type=str, help="Path to save RO Crate zip") +def rocrate(pipeline_dir, json, zip): + """ + Make an Research Object Crate + """ + import nf_core.ro_crate + + if json is None and zip is None: + log.error("Either --json or --zip must be specified") + sys.exit(1) + pipeline_dir = Path(pipeline_dir) + rocrate_obj = nf_core.ro_crate.RoCrate(pipeline_dir) + rocrate_obj.create_ro_crate(pipeline_dir, metadata_fn=json, zip_fn=zip) + + # Main script is being run - launch the CLI if __name__ == "__main__": run_nf_core() diff --git a/nf_core/ro_crate.py b/nf_core/ro_crate.py index abdb926e6f..fe8bc6a998 100644 --- a/nf_core/ro_crate.py +++ b/nf_core/ro_crate.py @@ -7,8 +7,12 @@ from pathlib import Path from typing import Union +import requests import rocrate.model.entity import rocrate.rocrate +from rocrate.model.person import Person + +from nf_core.utils import Pipeline log = logging.getLogger(__name__) @@ -16,31 +20,178 @@ class RoCrate: """Class to generate an RO Crate for a pipeline""" - def __init__(self, pipeline_dir: Union[str, Path], version=""): + def __init__(self, pipeline_dir: Path, version=""): + from nf_core.utils import is_pipeline_directory + + is_pipeline_directory(pipeline_dir) self.pipeline_dir = pipeline_dir self.version = version + self.crate: rocrate.rocrate.ROCrate + self.pipeline_obj = Pipeline(str(self.pipeline_dir)) + self.pipeline_obj._load() - def create_ro_create(self, outdir: Path, metadata_fn="", zip_fn=""): + def create_ro_crate(self, outdir: Path, metadata_fn="", zip_fn=""): """Create an RO Crate for the pipeline""" - # Create a temporary directory for the RO Crate - rocrate_dir = tempfile.mkdtemp(prefix="nf-core-ro-crate-") + # Set input paths + self.get_crate_paths(outdir) + + self.make_workflow_ro_crate(self.pipeline_dir) + + # Save just the JSON metadata file + if metadata_fn is not None: + log.info(f"Saving metadata file '{metadata_fn}'") + # Save the crate to a temporary directory + tmpdir = Path(tempfile.mkdtemp(), "wf") + self.crate.write(tmpdir) + # Now save just the JSON file + crate_json_fn = Path(tmpdir, "ro-crate-metadata.json") + crate_json_fn.rename(metadata_fn) + + # Save the whole crate zip file + if zip_fn is not None: + log.info(f"Saving zip file '{zip_fn}'") + self.crate.write_zip(zip_fn) + + def make_workflow_ro_crate(self, path: Path): + import nf_core.utils + + if self.pipeline_obj is None: + raise ValueError("Pipeline object not loaded") # Create the RO Crate - wf_crate = rocrate.rocrate.ROCrate(rocrate_dir) + self.crate = rocrate.rocrate.ROCrate() - # Set main entity file - wf_file = wf_crate.add_file(Path(self.pipeline_dir, "nextflow.config"), "nextflow.config") - wf_crate.mainEntity = wf_file + # Conform to RO-Crate 1.1 and workflowhub-ro-crate # Set language type programming_language = rocrate.model.entity.Entity( - wf_crate, - "https://www.nextflow.io/", + self.crate, + "#nextflow", properties={ "@type": ["ComputerLanguage", "SoftwareApplication"], "name": "Nextflow", "url": "https://www.nextflow.io/", + "identifier": "https://www.nextflow.io/", + "version": self.pipeline_obj.nf_config.get("manifest.nextflowVersion", ""), }, ) - wf_crate.add(programming_language) + self.crate.add(programming_language) + self.crate.update_jsonld( + { + "@id": "ro-crate-metadata.json", + "conformsTo": [ + {"@id": "https://w3id.org/ro/crate/1.1"}, + {"@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0"}, + ], + } + ) + + # Set main entity file + wf_file = self.crate.add_jsonld( + { + "@id": "main.nf", + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + }, + ) + self.crate.mainEntity = wf_file + # self.crate.update_jsonld({"@id": "main.nf", "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"]}) + + self.add_authors(wf_file) + wf_file.append_to("programmingLanguage", programming_language) + + # add readme as description + readme = Path(self.pipeline_dir, "README.md") + self.crate.description = readme.read_text() + + self.crate.license = "MIT" + + # add doi as identifier + # self.crate.identifier = self.pipeline_obj.get("manifest", {}).get("doi", "") + self.crate.name = f'Research Object Crate for {self.pipeline_obj.nf_config.get("manifest.name")}' + + if "dev" in self.pipeline_obj.nf_config.get("manifest.version", ""): + self.crate.CreativeWorkStatus = "InProgress" + else: + self.crate.CreativeWorkStatus = "Stable" + + # Add all other files + wf_filenames = nf_core.utils.get_wf_files(self.pipeline_dir) + log.debug(f"Adding {len(wf_filenames)} workflow files") + for fn in wf_filenames: + # check if it wasn't already added + if fn == "main.nf": + continue + # add nextflow language to .nf and .config files + if fn.endswith(".nf") or fn.endswith(".config"): + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn, properties={"programmingLanguage": {"@id": "#nextflow"}}) + if fn.endswith(".png"): + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn, properties={"@type": ["File", "ImageObject"]}) + if "metro_map" in fn: + log.info(f"Setting main entity image to: {fn}") + wf_file.append_to("image", {"@id": fn}) + if fn.endswith(".md"): + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn, properties={"encodingFormat": "text/markdown"}) + else: + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn) + + # Add keywords from github topics + + def add_authors(self, wf_file): + """ + Add workflow authors to the crate + NB: We don't have much metadata here - scope to improve in the future + """ + # add author entity to crate + + try: + authors = self.pipeline_obj.nf_config["manifest.author"].split(",") + except KeyError: + log.error("No author field found in manifest of nextflow.config") + return + for author in authors: + log.debug(f"Adding author: {author}") + orcid = get_orcid(author) + author_entitity = self.crate.add(Person(self.crate, orcid, properties={"name": author})) + wf_file.append_to("author", author_entitity) + + def get_crate_paths(self, path): + """Given a pipeline name, directory, or path, set wf_crate_filename""" + + path = Path(path) + + if path.is_dir(): + self.pipeline_dir = path + # wf_crate_filename = path / "ro-crate-metadata.json" + elif path.is_file(): + self.pipeline_dir = path.parent + # wf_crate_filename = path + + # Check that the schema file exists + if self.pipeline_dir is None: + raise OSError(f"Could not find pipeline '{path}'") + + +def get_orcid(name: str) -> Union[str, None]: + base_url = "https://pub.orcid.org/v3.0/search/" + headers = { + "Accept": "application/json", + } + params = {"q": f'family-name:"{name.split()[-1]}" AND given-names:"{name.split()[0]}"'} + response = requests.get(base_url, params=params, headers=headers) + + if response.status_code == 200: + json_response = response.json() + if json_response.get("num-found") == 1: + orcid_uri = json_response.get("result")[0].get("orcid-identifier", {}).get("uri") + log.info(f"Using found ORCID for {name}. Please double-check: {orcid_uri}") + return orcid_uri + else: + log.debug(f"No exact ORCID found for {name}. See {response.url}") + return None + else: + return f"API request unsuccessful. Status code: {response.status_code}" diff --git a/nf_core/schema.py b/nf_core/schema.py index b0c5dc04b6..5b68556fb1 100644 --- a/nf_core/schema.py +++ b/nf_core/schema.py @@ -42,7 +42,7 @@ def __init__(self): self.schema_from_scratch = False self.no_prompts = False self.web_only = False - self.web_schema_build_url = "https://nf-co.re/pipeline_schema_builder" + self.web_schema_build_url = "https://oldsite.nf-co.re/pipeline_schema_builder" self.web_schema_build_web_url = None self.web_schema_build_api_url = None @@ -633,7 +633,7 @@ def build_schema(self, pipeline_dir, no_prompts, web_only, url): # Extra help for people running offline if "Could not connect" in e.args[0]: log.info( - "If you're working offline, now copy your schema ({}) and paste at https://nf-co.re/pipeline_schema_builder".format( + "If you're working offline, now copy your schema ({}) and paste at https://oldsite.nf-co.re/pipeline_schema_builder".format( self.schema_filename ) ) @@ -853,6 +853,7 @@ def launch_web_builder(self): """ Send pipeline schema to web builder and wait for response """ + content = { "post_content": "json_schema", "api": "true", @@ -861,6 +862,7 @@ def launch_web_builder(self): "schema": json.dumps(self.schema), } web_response = nf_core.utils.poll_nfcore_web_api(self.web_schema_build_url, content) + try: if "api_url" not in web_response: raise AssertionError('"api_url" not in web_response') diff --git a/nf_core/utils.py b/nf_core/utils.py index d2b5fa9a5c..ef70d42b69 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -4,6 +4,7 @@ import concurrent.futures import datetime import errno +import fnmatch import hashlib import io import json @@ -40,14 +41,29 @@ [ ("qmark", "fg:ansiblue bold"), # token in front of the question ("question", "bold"), # question text - ("answer", "fg:ansigreen nobold bg:"), # submitted answer text behind the question - ("pointer", "fg:ansiyellow bold"), # pointer used in select and checkbox prompts - ("highlighted", "fg:ansiblue bold"), # pointed-at choice in select and checkbox prompts - ("selected", "fg:ansiyellow noreverse bold"), # style for a selected item of a checkbox + ( + "answer", + "fg:ansigreen nobold bg:", + ), # submitted answer text behind the question + ( + "pointer", + "fg:ansiyellow bold", + ), # pointer used in select and checkbox prompts + ( + "highlighted", + "fg:ansiblue bold", + ), # pointed-at choice in select and checkbox prompts + ( + "selected", + "fg:ansiyellow noreverse bold", + ), # style for a selected item of a checkbox ("separator", "fg:ansiblack"), # separator in lists ("instruction", ""), # user instructions for select, rawselect, checkbox ("text", ""), # plain text - ("disabled", "fg:gray italic"), # disabled choices for select and checkbox prompts + ( + "disabled", + "fg:gray italic", + ), # disabled choices for select and checkbox prompts ("choice-default", "fg:ansiblack"), ("choice-default-changed", "fg:ansiyellow"), ("choice-required", "fg:ansired"), @@ -58,7 +74,10 @@ os.environ.get("XDG_CACHE_HOME", os.path.join(os.getenv("HOME") or "", ".cache")), "nfcore", ) -NFCORE_DIR = os.path.join(os.environ.get("XDG_CONFIG_HOME", os.path.join(os.getenv("HOME") or "", ".config")), "nfcore") +NFCORE_DIR = os.path.join( + os.environ.get("XDG_CONFIG_HOME", os.path.join(os.getenv("HOME") or "", ".config")), + "nfcore", +) def fetch_remote_version(source_url): @@ -67,7 +86,11 @@ def fetch_remote_version(source_url): return remote_version -def check_if_outdated(current_version=None, remote_version=None, source_url="https://nf-co.re/tools_version"): +def check_if_outdated( + current_version=None, + remote_version=None, + source_url="https://nf-co.re/tools_version", +): """ Check if the current version of nf-core is outdated """ @@ -407,6 +430,7 @@ def poll_nfcore_web_api(api_url, post_data=None): if post_data is None: response = requests.get(api_url, headers={"Cache-Control": "no-cache"}) else: + log.debug(f"requesting {api_url} with {post_data}") response = requests.post(url=api_url, data=post_data) except requests.exceptions.Timeout: raise AssertionError(f"URL timed out: {api_url}") @@ -482,7 +506,8 @@ def __call__(self, r): with open(gh_cli_config_fn) as fh: gh_cli_config = yaml.safe_load(fh) self.auth = requests.auth.HTTPBasicAuth( - gh_cli_config["github.com"]["user"], gh_cli_config["github.com"]["oauth_token"] + gh_cli_config["github.com"]["user"], + gh_cli_config["github.com"]["oauth_token"], ) self.auth_mode = f"gh CLI config: {gh_cli_config['github.com']['user']}" except Exception: @@ -747,12 +772,18 @@ def get_tag_date(tag_date): # Obtain version and build match = re.search(r"(?::)+([A-Za-z\d\-_.]+)", img["image_name"]) if match is not None: - all_docker[match.group(1)] = {"date": get_tag_date(img["updated"]), "image": img} + all_docker[match.group(1)] = { + "date": get_tag_date(img["updated"]), + "image": img, + } elif img["image_type"] == "Singularity": # Obtain version and build match = re.search(r"(?::)+([A-Za-z\d\-_.]+)", img["image_name"]) if match is not None: - all_singularity[match.group(1)] = {"date": get_tag_date(img["updated"]), "image": img} + all_singularity[match.group(1)] = { + "date": get_tag_date(img["updated"]), + "image": img, + } # Obtain common builds from Docker and Singularity images common_keys = list(all_docker.keys() & all_singularity.keys()) current_date = None @@ -878,13 +909,19 @@ def prompt_pipeline_release_branch(wf_releases, wf_branches, multiple=False): # Releases if len(wf_releases) > 0: for tag in map(lambda release: release.get("tag_name"), wf_releases): - tag_display = [("fg:ansiblue", f"{tag} "), ("class:choice-default", "[release]")] + tag_display = [ + ("fg:ansiblue", f"{tag} "), + ("class:choice-default", "[release]"), + ] choices.append(questionary.Choice(title=tag_display, value=tag)) tag_set.append(tag) # Branches for branch in wf_branches.keys(): - branch_display = [("fg:ansiyellow", f"{branch} "), ("class:choice-default", "[branch]")] + branch_display = [ + ("fg:ansiyellow", f"{branch} "), + ("class:choice-default", "[branch]"), + ] choices.append(questionary.Choice(title=branch_display, value=branch)) tag_set.append(branch) @@ -915,7 +952,8 @@ def validate(self, value): return True else: raise questionary.ValidationError( - message="Invalid remote cache index file", cursor_position=len(value.text) + message="Invalid remote cache index file", + cursor_position=len(value.text), ) else: return True @@ -945,7 +983,13 @@ def get_repo_releases_branches(pipeline, wfs): pipeline = wf.full_name # Store releases and stop loop - wf_releases = list(sorted(wf.releases, key=lambda k: k.get("published_at_timestamp", 0), reverse=True)) + wf_releases = list( + sorted( + wf.releases, + key=lambda k: k.get("published_at_timestamp", 0), + reverse=True, + ) + ) break # Arbitrary GitHub repo @@ -965,7 +1009,13 @@ def get_repo_releases_branches(pipeline, wfs): raise AssertionError(f"Not able to find pipeline '{pipeline}'") except AttributeError: # Success! We have a list, which doesn't work with .get() which is looking for a dict key - wf_releases = list(sorted(rel_r.json(), key=lambda k: k.get("published_at_timestamp", 0), reverse=True)) + wf_releases = list( + sorted( + rel_r.json(), + key=lambda k: k.get("published_at_timestamp", 0), + reverse=True, + ) + ) # Get release tag commit hashes if len(wf_releases) > 0: @@ -1192,3 +1242,21 @@ def set_wd(path: Path) -> Generator[None, None, None]: yield finally: os.chdir(start_wd) + + +def get_wf_files(wf_path: Path): + """Return a list of all files in a directory (ignores .gitigore files)""" + + wf_files = [] + + with open(Path(wf_path, ".gitignore")) as f: + lines = f.read().splitlines() + ignore = [line for line in lines if line and not line.startswith("#")] + + for path in Path(wf_path).rglob("*"): + if any(fnmatch.fnmatch(str(path), pattern) for pattern in ignore): + continue + if path.is_file(): + wf_files.append(str(path)) + + return wf_files From ddaacbdcc219834988f5125d7c8ee1f6817b4d6f Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Wed, 24 Jan 2024 09:13:00 +0000 Subject: [PATCH 04/65] [automated] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index acafa5e7bc..266f36f825 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ - Add new subcommand: `nf-core logo-create` to output an nf-core logo for a pipeline (instead of going through the website) ([#2662](https://github.com/nf-core/tools/pull/2662)) - Update actions/cache action to v4 ([#2666](https://github.com/nf-core/tools/pull/2666)) - Remove redundanct v in pipeline version for emails ([#2667](https://github.com/nf-core/tools/pull/2667)) +- Add new command `nf-core rocrate` to create a Research Object (RO) crate for a pipeline ([#2680](https://github.com/nf-core/tools/pull/2680)) # [v2.11.1 - Magnesium Dragon Patch](https://github.com/nf-core/tools/releases/tag/2.11) - [2023-12-20] From ffcdc6f65cc78ba997ae158d6c4837558d30f7d9 Mon Sep 17 00:00:00 2001 From: mashehu Date: Wed, 24 Jan 2024 10:49:37 +0100 Subject: [PATCH 05/65] fix types --- nf_core/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nf_core/utils.py b/nf_core/utils.py index d09639fc13..5491f2ad8c 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -155,8 +155,8 @@ def __init__(self, wf_path): self.git_sha = None self.minNextflowVersion = None self.wf_path = wf_path - self.pipeline_name = None - self.pipeline_prefix = None + self.pipeline_name = "" + self.pipeline_prefix = "" self.schema_obj = None try: From a87f6aa5dd9065da0feb6a027ba2d3cdf085d1d9 Mon Sep 17 00:00:00 2001 From: mashehu Date: Wed, 24 Jan 2024 17:45:30 +0100 Subject: [PATCH 06/65] fix types --- nf_core/components/components_command.py | 11 +++++++---- nf_core/components/info.py | 9 +++++---- nf_core/modules/modules_json.py | 17 ++++++++++------- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/nf_core/components/components_command.py b/nf_core/components/components_command.py index 8332429835..77cb484fc3 100644 --- a/nf_core/components/components_command.py +++ b/nf_core/components/components_command.py @@ -246,10 +246,13 @@ def check_patch_paths(self, patch_path: Path, module_name: str) -> None: modules_json = ModulesJson(self.dir) modules_json.load() if modules_json.has_git_url_and_modules(): - modules_json.modules_json["repos"][self.modules_repo.remote_url]["modules"][ - self.modules_repo.repo_path - ][module_name]["patch"] = str(patch_path.relative_to(Path(self.dir).resolve())) - modules_json.dump() + if modules_json.modules_json is not None: + modules_json.modules_json["repos"][self.modules_repo.remote_url]["modules"][ + self.modules_repo.repo_path + ][module_name]["patch"] = str(patch_path.relative_to(Path(self.dir).resolve())) + modules_json.dump() + else: + log.error("Could not update modules.json file.") def check_if_in_include_stmts(self, component_path: str) -> Dict[str, List[Dict[str, Union[int, str]]]]: """ diff --git a/nf_core/components/info.py b/nf_core/components/info.py index 54fc0004dc..e692cadea8 100644 --- a/nf_core/components/info.py +++ b/nf_core/components/info.py @@ -1,6 +1,7 @@ import logging import os from pathlib import Path +from typing import Union import questionary import yaml @@ -65,7 +66,7 @@ def __init__( no_pull=False, ): super().__init__(component_type, pipeline_dir, remote_url, branch, no_pull) - self.meta = None + self.meta = {} self.local_path = None self.remote_location = None self.local = None @@ -162,7 +163,7 @@ def get_component_info(self): return self.generate_component_info_help() - def get_local_yaml(self): + def get_local_yaml(self) -> dict: """Attempt to get the meta.yml file from a locally installed module/subworkflow. Returns: @@ -201,9 +202,9 @@ def get_local_yaml(self): return yaml.safe_load(fh) log.debug(f"{self.component_type[:-1].title()} '{self.component}' meta.yml not found locally") - return None + return {} - def get_remote_yaml(self): + def get_remote_yaml(self) -> Union[dict, bool]: """Attempt to get the meta.yml file from a remote repo. Returns: diff --git a/nf_core/modules/modules_json.py b/nf_core/modules/modules_json.py index f68c27b2d8..6042057139 100644 --- a/nf_core/modules/modules_json.py +++ b/nf_core/modules/modules_json.py @@ -43,7 +43,7 @@ def __init__(self, pipeline_dir): self.modules_dir = Path(self.dir, "modules") self.subworkflows_dir = Path(self.dir, "subworkflows") self.modules_json_path = Path(self.dir, "modules.json") - self.modules_json: Union(dict, None) = None + self.modules_json: Union[dict, None] = None self.pipeline_modules = None self.pipeline_subworkflows = None self.pipeline_components = None @@ -1056,12 +1056,15 @@ def dump(self, run_prettier: bool = False): Sort the modules.json, and write it to file """ # Sort the modules.json - self.modules_json["repos"] = nf_core.utils.sort_dictionary(self.modules_json["repos"]) - if run_prettier: - dump_json_with_prettier(self.modules_json_path, self.modules_json) - else: - with open(self.modules_json_path, "w") as fh: - json.dump(self.modules_json, fh, indent=4) + if self.modules_json is None: + self.load() + if self.modules_json is not None: + self.modules_json["repos"] = nf_core.utils.sort_dictionary(self.modules_json["repos"]) + if run_prettier: + dump_json_with_prettier(self.modules_json_path, self.modules_json) + else: + with open(self.modules_json_path, "w") as fh: + json.dump(self.modules_json, fh, indent=4) def resolve_missing_installation(self, missing_installation, component_type): missing_but_in_mod_json = [ From 63cccc8a54bc62d63faec3349363bd1ff243f9b8 Mon Sep 17 00:00:00 2001 From: mashehu Date: Wed, 24 Jan 2024 17:46:03 +0100 Subject: [PATCH 07/65] use git to get all workflow files --- nf_core/utils.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/nf_core/utils.py b/nf_core/utils.py index 5491f2ad8c..7eb3741f15 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -4,7 +4,6 @@ import concurrent.futures import datetime import errno -import fnmatch import hashlib import io import json @@ -1108,7 +1107,7 @@ def get_first_available_path(directory, paths): return None -def sort_dictionary(d): +def sort_dictionary(d: dict) -> dict: """Sorts a nested dictionary recursively""" result = {} for k, v in sorted(d.items()): @@ -1253,17 +1252,13 @@ def set_wd(path: Path) -> Generator[None, None, None]: def get_wf_files(wf_path: Path): """Return a list of all files in a directory (ignores .gitigore files)""" + from git import InvalidGitRepositoryError, Repo wf_files = [] - - with open(Path(wf_path, ".gitignore")) as f: - lines = f.read().splitlines() - ignore = [line for line in lines if line and not line.startswith("#")] - - for path in Path(wf_path).rglob("*"): - if any(fnmatch.fnmatch(str(path), pattern) for pattern in ignore): - continue - if path.is_file(): - wf_files.append(str(path.relative_to(wf_path))) + try: + repo = Repo(wf_path) + wf_files = [str(f) for f in repo.git.ls_files().split("\n")] + except InvalidGitRepositoryError: + log.error(f"Could not find git repository at {wf_path}") return wf_files From e65248ec5d65b96187409eea059cba5529dbbe70 Mon Sep 17 00:00:00 2001 From: mashehu Date: Wed, 24 Jan 2024 17:47:06 +0100 Subject: [PATCH 08/65] restructure code and add tests --- nf_core/rocrate.py | 187 +++++++++++++++++++++++++++++++----------- tests/test_rocrate.py | 86 +++++++++++++++++++ 2 files changed, 223 insertions(+), 50 deletions(-) create mode 100644 tests/test_rocrate.py diff --git a/nf_core/rocrate.py b/nf_core/rocrate.py index e565375e58..dde221e121 100644 --- a/nf_core/rocrate.py +++ b/nf_core/rocrate.py @@ -11,6 +11,7 @@ import requests import rocrate.model.entity import rocrate.rocrate +from git import GitCommandError, InvalidGitRepositoryError, Repo from rocrate.model.person import Person from nf_core.utils import Pipeline @@ -19,10 +20,17 @@ class RoCrate: - """Class to generate an RO Crate for a pipeline""" + """ + Class to generate an RO Crate for a pipeline + + Args: + pipeline_dir (Path): Path to the pipeline directory + version (str): Version of the pipeline to use + + """ def __init__(self, pipeline_dir: Path, version=""): - from nf_core.utils import is_pipeline_directory + from nf_core.utils import is_pipeline_directory, setup_requests_cachedir try: is_pipeline_directory(pipeline_dir) @@ -36,13 +44,55 @@ def __init__(self, pipeline_dir: Path, version=""): self.pipeline_obj = Pipeline(str(self.pipeline_dir)) self.pipeline_obj._load() - def create_ro_crate(self, outdir: Path, metadata_fn="", zip_fn=""): - """Create an RO Crate for the pipeline""" + setup_requests_cachedir() + + def create_ro_crate( + self, outdir: Path, metadata_fn: Union[str, None, Path] = None, zip_fn: Union[str, None] = None + ) -> None: + """ + Create an RO Crate for a pipeline + + Args: + outdir (Path): Path to the output directory + metadata_fn (str): Filename for the metadata file + zip_fn (str): Filename for the zip file + + """ + import os # Set input paths - self.get_crate_paths(outdir) + try: + self.set_crate_paths(outdir) + except OSError as e: + log.error(e) + sys.exit(1) - self.make_workflow_ro_crate(self.pipeline_dir) + # Change to the pipeline directory, because the RO Crate doesn't handle relative paths well + current_path = Path.cwd() + os.chdir(self.pipeline_dir) + + # Check that the checkout pipeline version is the same as the requested version + if self.version: + if self.version != self.pipeline_obj.nf_config.get("manifest.version"): + # using git checkout to get the requested version + log.info(f"Checking out pipeline version {self.version}") + try: + self.repo = Repo(self.pipeline_dir) + self.repo.git.checkout(self.version) + self.pipeline_obj = Pipeline(str(self.pipeline_dir)) + self.pipeline_obj._load() + except InvalidGitRepositoryError: + log.error(f"Could not find a git repository in {self.pipeline_dir}") + sys.exit(1) + except GitCommandError: + log.error(f"Could not checkout version {self.version}") + sys.exit(1) + + try: + self.make_workflow_ro_crate() + except Exception as e: + log.error(e) + sys.exit(1) # Save just the JSON metadata file if metadata_fn is not None: @@ -59,17 +109,22 @@ def create_ro_crate(self, outdir: Path, metadata_fn="", zip_fn=""): log.info(f"Saving zip file '{zip_fn}'") self.crate.write_zip(zip_fn) - def make_workflow_ro_crate(self, path: Path): - import nf_core.utils + # Change back to the original directory + os.chdir(current_path) + + def make_workflow_ro_crate(self) -> None: + """ + Create an RO Crate for a pipeline + Args: + path (Path): Path to the pipeline directory + """ if self.pipeline_obj is None: raise ValueError("Pipeline object not loaded") - # Create the RO Crate + # Create the RO Crate object self.crate = rocrate.rocrate.ROCrate() - # Conform to RO-Crate 1.1 and workflowhub-ro-crate - # Set language type programming_language = rocrate.model.entity.Entity( self.crate, @@ -83,6 +138,8 @@ def make_workflow_ro_crate(self, path: Path): }, ) self.crate.add(programming_language) + + # Conform to RO-Crate 1.1 and workflowhub-ro-crate self.crate.update_jsonld( { "@id": "ro-crate-metadata.json", @@ -94,26 +151,19 @@ def make_workflow_ro_crate(self, path: Path): ) # Set main entity file - wf_file = self.crate.add_jsonld( - { - "@id": "main.nf", - "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], - }, - ) - self.crate.mainEntity = wf_file - # self.crate.update_jsonld({"@id": "main.nf", "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"]}) - - self.add_authors(wf_file) - wf_file.append_to("programmingLanguage", programming_language) + self.set_main_entity("main.nf") # add readme as description - readme = Path(self.pipeline_dir, "README.md") - self.crate.description = readme.read_text() + readme = Path("README.md") + + try: + self.crate.description = readme.read_text() + except FileNotFoundError: + log.error(f"Could not find README.md in {self.pipeline_dir}") self.crate.license = "MIT" # add doi as identifier - # self.crate.identifier = self.pipeline_obj.get("manifest", {}).get("doi", "") self.crate.name = f'Research Object Crate for {self.pipeline_obj.nf_config.get("manifest.name")}' if "dev" in self.pipeline_obj.nf_config.get("manifest.version", ""): @@ -122,30 +172,23 @@ def make_workflow_ro_crate(self, path: Path): self.crate.CreativeWorkStatus = "Stable" # Add all other files - wf_filenames = nf_core.utils.get_wf_files(self.pipeline_dir) - log.debug(f"Adding {len(wf_filenames)} workflow files") - for fn in wf_filenames: - # check if it wasn't already added - if fn == "main.nf": - continue - # add nextflow language to .nf and .config files - if fn.endswith(".nf") or fn.endswith(".config"): - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn, properties={"programmingLanguage": {"@id": "#nextflow"}}) - if fn.endswith(".png"): - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn, properties={"@type": ["File", "ImageObject"]}) - if "metro_map" in fn: - log.info(f"Setting main entity image to: {fn}") - wf_file.append_to("image", {"@id": fn}) - if fn.endswith(".md"): - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn, properties={"encodingFormat": "text/markdown"}) - else: - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn) + self.add_workflow_files() - # get keywords from github topics + def set_main_entity(self, main_entity_filename: str): + """ + Set the main.nf as the main entity of the crate and add necessary metadata + """ + + wf_file = self.crate.add_jsonld( + { + "@id": main_entity_filename, + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + }, + ) + self.crate.mainEntity = wf_file + self.add_main_authors(wf_file) + wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}) + # get keywords from nf-core website remote_workflows = requests.get("https://nf-co.re/pipelines.json").json()["remote_workflows"] # go through all remote workflows and find the one that matches the pipeline name topics = ["nf-core", "nextflow"] @@ -157,7 +200,7 @@ def make_workflow_ro_crate(self, path: Path): log.debug(f"Adding topics: {topics}") wf_file.append_to("keywords", topics) - def add_authors(self, wf_file): + def add_main_authors(self, wf_file): """ Add workflow authors to the crate NB: We don't have much metadata here - scope to improve in the future @@ -175,7 +218,42 @@ def add_authors(self, wf_file): author_entitity = self.crate.add(Person(self.crate, orcid, properties={"name": author})) wf_file.append_to("author", author_entitity) - def get_crate_paths(self, path): + def add_workflow_files(self): + """ + Add workflow files to the RO Crate + """ + import nf_core.utils + + wf_filenames = nf_core.utils.get_wf_files(Path.cwd()) + # exclude github action files + wf_filenames = [fn for fn in wf_filenames if not fn.startswith(".github/")] + log.debug(f"Adding {len(wf_filenames)} workflow files") + for fn in wf_filenames: + # skip main.nf + if fn == "main.nf": + continue + # add nextflow language to .nf and .config files + if fn.endswith(".nf") or fn.endswith(".config") or fn.endswith(".nf.test"): + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn, properties={"programmingLanguage": {"@id": "#nextflow"}}) + continue + if fn.endswith(".png"): + log.debug(f"Adding workflow image file: {fn}") + self.crate.add_jsonld({"@id": Path(fn).name, "@type": ["File", "ImageObject"]}) + if "metro_map" in fn: + log.info(f"Setting main entity image to: {fn}") + self.crate.mainEntity.append_to("image", {"@id": Path(fn).name}) + continue + if fn.endswith(".md"): + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn, properties={"encodingFormat": "text/markdown"}) + continue + else: + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn) + continue + + def set_crate_paths(self, path: Path) -> None: """Given a pipeline name, directory, or path, set wf_crate_filename""" path = Path(path) @@ -193,6 +271,15 @@ def get_crate_paths(self, path): def get_orcid(name: str) -> Union[str, None]: + """ + Get the ORCID for a given name + + Args: + name (str): Name of the author + + Returns: + str: ORCID URI or None + """ base_url = "https://pub.orcid.org/v3.0/search/" headers = { "Accept": "application/json", diff --git a/tests/test_rocrate.py b/tests/test_rocrate.py new file mode 100644 index 0000000000..3db1876c04 --- /dev/null +++ b/tests/test_rocrate.py @@ -0,0 +1,86 @@ +""" Test the nf-core rocrate command """ + + +import shutil +import tempfile +import unittest +from pathlib import Path + +import rocrate.rocrate +from git import Repo + +import nf_core.create +import nf_core.rocrate +import nf_core.utils + + +class TestROCrate(unittest.TestCase): + """Class for lint tests""" + + def setUp(self): + """Function that runs at start of tests for common resources + + Use nf_core.create() to make a pipeline that we can use for testing + """ + + self.tmp_dir = Path(tempfile.mkdtemp()) + self.test_pipeline_dir = Path(self.tmp_dir, "nf-core-testpipeline") + self.create_obj = nf_core.create.PipelineCreate( + name="testpipeline", + description="This is a test pipeline", + author="Test McTestFace", + outdir=self.test_pipeline_dir, + version="1.0.0", + no_git=False, + force=True, + plain=True, + ) + self.create_obj.init_pipeline() + + # add fake metro map + Path(self.test_pipeline_dir, "docs", "images", "nf-core-testpipeline_metro_map.png").touch() + # commit the changes + repo = Repo(self.test_pipeline_dir) + repo.git.add(A=True) + repo.index.commit("Initial commit") + + def tearDown(self): + """Clean up temporary files and folders""" + + if self.tmp_dir.exists(): + shutil.rmtree(self.tmp_dir) + + def test_rocrate_creation(self): + """Run the nf-core rocrate command""" + + # Run the command + self.rocrate_obj = nf_core.rocrate.RoCrate(self.test_pipeline_dir) + self.rocrate_obj.create_ro_crate( + self.test_pipeline_dir, metadata_fn=Path(self.test_pipeline_dir, "ro-crate-metadata.json") + ) + + # Check that the crate was created + self.assertTrue(Path(self.test_pipeline_dir, "ro-crate-metadata.json").exists()) + + # Check that the entries in the crate are correct + crate = rocrate.rocrate.ROCrate(str(self.test_pipeline_dir)) + entities = crate.get_entities() + + # Check if the correct entities are set: + for entity in entities: + entity_json = entity.as_jsonld() + if entity_json["@id"] == "./": + self.assertEqual(entity_json.get("name"), "Research Object Crate for nf-core/testpipeline") + self.assertEqual(entity_json["mainEntity"], {"@id": "#main.nf"}) + elif entity_json["@id"] == "#main.nf": + self.assertEqual(entity_json["programmingLanguage"], [{"@id": "#nextflow"}]) + self.assertEqual(entity_json["image"], [{"@id": "nf-core-testpipeline_metro_map.png"}]) + # assert there is a metro map + # elif entity_json["@id"] == "nf-core-testpipeline_metro_map.png": # FIXME waiting for https://github.com/ResearchObject/ro-crate-py/issues/174 + # self.assertEqual(entity_json["@type"], ["File", "ImageObject"]) + # assert that author is set as a person + elif "name" in entity_json and entity_json["name"] == "Test McTestFace": + self.assertEqual(entity_json["@type"], "Person") + # check that it is set as author of the main entity + if crate.mainEntity is not None: + self.assertEqual(crate.mainEntity["author"][0].id, entity_json["@id"]) From 1210d1e63d64556a80c44a0b7120fc1011ef0328 Mon Sep 17 00:00:00 2001 From: mashehu Date: Wed, 24 Jan 2024 18:13:27 +0100 Subject: [PATCH 09/65] add missing dep --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index e4319d2352..436c193cb4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,5 +18,6 @@ requests requests_cache rich-click>=1.6.1 rich>=13.3.1 +rocrate tabulate trogon From 83d22170e477679b067817f0a556f994eccce07c Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 11:46:08 +0100 Subject: [PATCH 10/65] add documentation --- README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/README.md b/README.md index c612c442b8..754f12af9d 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ A python package with helper tools for the nf-core community. - [`nf-core schema` - Work with pipeline schema files](#pipeline-schema) - [`nf-core bump-version` - Update nf-core pipeline version number](#bumping-a-pipeline-version-number) - [`nf-core sync` - Synchronise pipeline TEMPLATE branches](#sync-a-pipeline-with-the-template) +- [`nf-core rocrate` - Create a Research Object (RO) crate metadata file](#create-a-ro-crate-metadata-file) - [`nf-core tui` - Explore the nf-core command line graphically](#tools-cli-tui) - [`nf-core modules` - commands for dealing with DSL2 modules](#modules) @@ -732,6 +733,24 @@ To create the pull request, a personal access token is required for API authenti These can be created at [https://github.com/settings/tokens](https://github.com/settings/tokens). Supply this using the `--auth-token` flag. +## Create a RO-Crate Metadata File + +The [Research Object Crate (RO-Crate)](https://www.researchobject.org/ro-crate/) is a community-driven specification for packaging research data with associated metadata and their relationships. The RO-Crate metadata file is a JSON-LD file that describes the research data and its relationships. The RO-Crate metadata file can be used to generate a RO-Crate, which is a single file that contains the research data and its metadata. The RO-Crate can be used to share research data and its metadata with other researchers and to make research data FAIR (Findable, Accessible, Interoperable, and Reusable). + +The `nf-core crate` command creates a RO-Crate metadata file for a nf-core pipeline. + + + +![`nf-core crate`](docs/images/nf-core-crate.svg) + +> [!NOTE] +> We make the following assumptions about the pipeline: +> +> - If the pipeline has one of the famous nf-core metro maps, we assume that it is located in `docs/images` and contains `metro-map` in its filename. This will be used as the workflow graph in the RO-Crate. +> - Based on the names in the `manifest.author` inside the `nextflow.config` file, we add the [ORCID](https://orcid.org/) IDs of the authors to the RO-Crate metadata file (based on very simple assumpations). Please double check the ORCID IDs after creating the RO-Crate metadata file (see log) and add missing ORCID IDs manually. + ## Tools CLI TUI _CLI:_ Command line interface From b505a62c48b60fd1e91529366746e4a3a2dc760b Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 12:27:32 +0100 Subject: [PATCH 11/65] add recommendations from comments --- README.md | 4 ++-- nf_core/__main__.py | 33 +++++++++++++++++++------- nf_core/rocrate.py | 57 +++++++++++++++++++++++++-------------------- 3 files changed, 58 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 754f12af9d..6b4903250b 100644 --- a/README.md +++ b/README.md @@ -748,8 +748,8 @@ working_dir: tmp/nf-core-nextbigthing > [!NOTE] > We make the following assumptions about the pipeline: > -> - If the pipeline has one of the famous nf-core metro maps, we assume that it is located in `docs/images` and contains `metro-map` in its filename. This will be used as the workflow graph in the RO-Crate. -> - Based on the names in the `manifest.author` inside the `nextflow.config` file, we add the [ORCID](https://orcid.org/) IDs of the authors to the RO-Crate metadata file (based on very simple assumpations). Please double check the ORCID IDs after creating the RO-Crate metadata file (see log) and add missing ORCID IDs manually. +> - If the pipeline has one of the famous nf-core metro maps, we expect it to be a `.png` file stored in `docs/images`, with `metro_map` in its name, e.g., `nf-core-rnaseq_metro_map_grey.png`. This image will be set as the workflow graph in the RO-Crate. +> - Based on the names in the `manifest.author` inside the `nextflow.config` file, we add the [ORCID](https://orcid.org/) IDs of the authors to the RO-Crate metadata file (based on very simple assumpations). Please double-check the ORCID IDs after creating the RO-Crate metadata file (see log) and add missing ORCID IDs manually. ## Tools CLI TUI diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 0c39e55c45..f1c4ba2d2a 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -2136,24 +2136,39 @@ def sync(dir, from_branch, pull_request, github_repository, username, template_y ) @click.option( "-j", - "--json", - default="ro-crate-metadata.json", + "--json_path", + default=Path.cwd(), type=str, help="Path to save RO Crate metadata json", ) -@click.option("-z", "--zip", type=str, help="Path to save RO Crate zip") -def rocrate(pipeline_dir, json, zip): +@click.option("-z", "--zip_path", type=str, help="Path to save RO Crate zip") +@click.option( + "-pv", + "--pipeline_version", + type=str, + help="Version of pipeline to use for RO Crate", +) +def rocrate(pipeline_dir, json_path, zip_path, pipeline_version): """ Make an Research Object Crate """ import nf_core.rocrate - if json is None and zip is None: - log.error("Either --json or --zip must be specified") + if json_path is None and zip_path is None: + log.error("Either --json_path or --zip_path must be specified") sys.exit(1) - pipeline_dir = Path(pipeline_dir) - rocrate_obj = nf_core.rocrate.RoCrate(pipeline_dir) - rocrate_obj.create_ro_crate(pipeline_dir, metadata_fn=json, zip_fn=zip) + else: + pipeline_dir = Path(pipeline_dir) + if json_path is not None: + json_path = Path(json_path) + if zip_path is not None: + zip_path = Path(zip_path) + try: + rocrate_obj = nf_core.rocrate.RoCrate(pipeline_dir, pipeline_version) + rocrate_obj.create_ro_crate(pipeline_dir, metadata_path=json_path, zip_path=zip_path) + except (UserWarning, LookupError, FileNotFoundError) as e: + log.error(e) + sys.exit(1) # Main script is being run - launch the CLI diff --git a/nf_core/rocrate.py b/nf_core/rocrate.py index dde221e121..d261de434f 100644 --- a/nf_core/rocrate.py +++ b/nf_core/rocrate.py @@ -32,11 +32,7 @@ class RoCrate: def __init__(self, pipeline_dir: Path, version=""): from nf_core.utils import is_pipeline_directory, setup_requests_cachedir - try: - is_pipeline_directory(pipeline_dir) - except UserWarning as e: - log.error(e) - sys.exit(1) + is_pipeline_directory(pipeline_dir) self.pipeline_dir = pipeline_dir self.version = version @@ -47,15 +43,15 @@ def __init__(self, pipeline_dir: Path, version=""): setup_requests_cachedir() def create_ro_crate( - self, outdir: Path, metadata_fn: Union[str, None, Path] = None, zip_fn: Union[str, None] = None + self, outdir: Path, metadata_path: Union[None, Path] = None, zip_path: Union[None, Path] = None ) -> None: """ Create an RO Crate for a pipeline Args: outdir (Path): Path to the output directory - metadata_fn (str): Filename for the metadata file - zip_fn (str): Filename for the zip file + metadata_path (Path): Path to the metadata file + zip_path (Path): Path to the zip file """ import os @@ -88,26 +84,29 @@ def create_ro_crate( log.error(f"Could not checkout version {self.version}") sys.exit(1) - try: - self.make_workflow_ro_crate() - except Exception as e: - log.error(e) - sys.exit(1) + self.make_workflow_ro_crate() # Save just the JSON metadata file - if metadata_fn is not None: - log.info(f"Saving metadata file '{metadata_fn}'") + if metadata_path is not None: + log.info(f"Saving metadata file '{metadata_path}'") # Save the crate to a temporary directory tmpdir = Path(tempfile.mkdtemp(), "wf") self.crate.write(tmpdir) # Now save just the JSON file crate_json_fn = Path(tmpdir, "ro-crate-metadata.json") - crate_json_fn.rename(metadata_fn) + if metadata_path.name == "ro-crate-metadata.json": + crate_json_fn.rename(metadata_path) + else: + crate_json_fn.rename(metadata_path / "ro-crate-metadata.json") # Save the whole crate zip file - if zip_fn is not None: - log.info(f"Saving zip file '{zip_fn}'") - self.crate.write_zip(zip_fn) + if zip_path is not None: + if zip_path.name == "ro-crate.crate.zip": + log.info(f"Saving zip file '{zip_path}'") + self.crate.write_zip(zip_path) + else: + log.info(f"Saving zip file '{zip_path}/ro-crate.crate.zip;") + self.crate.write_zip(zip_path / "ro-crate.crate.zip") # Change back to the original directory os.chdir(current_path) @@ -115,9 +114,6 @@ def create_ro_crate( def make_workflow_ro_crate(self) -> None: """ Create an RO Crate for a pipeline - - Args: - path (Path): Path to the pipeline directory """ if self.pipeline_obj is None: raise ValueError("Pipeline object not loaded") @@ -160,8 +156,18 @@ def make_workflow_ro_crate(self) -> None: self.crate.description = readme.read_text() except FileNotFoundError: log.error(f"Could not find README.md in {self.pipeline_dir}") - - self.crate.license = "MIT" + # get license from LICENSE file + license_file = Path("LICENSE") + try: + license = license_file.read_text() + if license.startswith("MIT"): + self.crate.license = "MIT" + else: + # prompt for license + log.info("Could not determine license from LICENSE file") + self.crate.license = input("Please enter the license for this pipeline: ") + except FileNotFoundError: + log.error(f"Could not find LICENSE file in {self.pipeline_dir}") # add doi as identifier self.crate.name = f'Research Object Crate for {self.pipeline_obj.nf_config.get("manifest.name")}' @@ -297,4 +303,5 @@ def get_orcid(name: str) -> Union[str, None]: log.debug(f"No exact ORCID found for {name}. See {response.url}") return None else: - return f"API request unsuccessful. Status code: {response.status_code}" + log.info(f"API request to ORCID unsuccessful. Status code: {response.status_code}") + return None From e8959efe00802ca7de35d2db9f5bd63fe9a82b2b Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 13:03:17 +0100 Subject: [PATCH 12/65] add git contributors --- nf_core/rocrate.py | 28 ++++++++++++++++++++++++---- nf_core/utils.py | 4 ++-- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/nf_core/rocrate.py b/nf_core/rocrate.py index d261de434f..1da39d0d65 100644 --- a/nf_core/rocrate.py +++ b/nf_core/rocrate.py @@ -11,7 +11,7 @@ import requests import rocrate.model.entity import rocrate.rocrate -from git import GitCommandError, InvalidGitRepositoryError, Repo +from git import GitCommandError, InvalidGitRepositoryError from rocrate.model.person import Person from nf_core.utils import Pipeline @@ -73,8 +73,7 @@ def create_ro_crate( # using git checkout to get the requested version log.info(f"Checking out pipeline version {self.version}") try: - self.repo = Repo(self.pipeline_dir) - self.repo.git.checkout(self.version) + self.pipeline_obj.repo.git.checkout(self.version) self.pipeline_obj = Pipeline(str(self.pipeline_dir)) self.pipeline_obj._load() except InvalidGitRepositoryError: @@ -209,15 +208,36 @@ def set_main_entity(self, main_entity_filename: str): def add_main_authors(self, wf_file): """ Add workflow authors to the crate - NB: We don't have much metadata here - scope to improve in the future """ # add author entity to crate try: authors = self.pipeline_obj.nf_config["manifest.author"].split(",") + # remove spaces + authors = [a.strip() for a in authors] except KeyError: log.error("No author field found in manifest of nextflow.config") return + # look at git contributors for author names + try: + contributors = set() + + commits_touching_path = list(self.pipeline_obj.repo.iter_commits(paths="main.nf")) + + for commit in commits_touching_path: + contributors.add(commit.author.name) + # exclude bots + contributors = [c for c in contributors if not c.endswith("bot") or c != "Travis CI User"] + # remove usernames (just keep names with spaces) + contributors = [c for c in contributors if " " in c] + + log.debug(f"Found {len(contributors)} git authors") + for git_author in contributors: + if git_author not in authors: + authors.append(git_author) + except AttributeError: + log.debug("Could not find git authors") + for author in authors: log.debug(f"Adding author: {author}") orcid = get_orcid(author) diff --git a/nf_core/utils.py b/nf_core/utils.py index 7eb3741f15..998fa22fe2 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -159,8 +159,8 @@ def __init__(self, wf_path): self.schema_obj = None try: - repo = git.Repo(self.wf_path) - self.git_sha = repo.head.object.hexsha + self.repo = git.Repo(self.wf_path) + self.git_sha = self.repo.head.object.hexsha except Exception: log.debug(f"Could not find git hash for pipeline: {self.wf_path}") From c271c75e9bd248057a28b503533dd1484433c7fe Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 13:07:16 +0100 Subject: [PATCH 13/65] Add to files_exist linting --- nf_core/lint/files_exist.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/nf_core/lint/files_exist.py b/nf_core/lint/files_exist.py index 117704d1f1..1bec9a7dc7 100644 --- a/nf_core/lint/files_exist.py +++ b/nf_core/lint/files_exist.py @@ -71,6 +71,7 @@ def files_exist(self): .github/workflows/awsfulltest.yml lib/WorkflowPIPELINE.groovy pyproject.toml + ro-crate-metadata.json Files that *must not* be present, due to being renamed or removed in the template: @@ -176,6 +177,7 @@ def files_exist(self): [os.path.join("lib", f"Workflow{short_name[0].upper()}{short_name[1:]}.groovy")], ["modules.json"], ["pyproject.toml"], + ["ro-crate-metadata.json"], ] # List of strings. Fails / warns if any of the strings exist. @@ -198,6 +200,9 @@ def files_exist(self): ] files_warn_ifexists = [".travis.yml"] + files_hint = [ + [["ro-crate-metadata.json"], "Run `nf-core rocrate to generate this file."], + ] # Remove files that should be ignored according to the linting config ignore_files = self.lint_config.get("files_exist", []) @@ -225,7 +230,11 @@ def pf(file_path): if any([os.path.isfile(pf(f)) for f in files]): passed.append(f"File found: {self._wrap_quotes(files)}") else: - warned.append(f"File not found: {self._wrap_quotes(files)}") + hint = "" + for file_hint in files_hint: + if file_hint[0] == files: + hint = +file_hint[1] + warned.append(f"File not found: {self._wrap_quotes(files)} {hint}") # Files that cause an error if they exist for file in files_fail_ifexists: From 02b2c4d8c2307f2a4e418d61c0f29583e404f0e0 Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 13:16:35 +0100 Subject: [PATCH 14/65] fix tests --- nf_core/lint/files_exist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nf_core/lint/files_exist.py b/nf_core/lint/files_exist.py index 1bec9a7dc7..d9addd975e 100644 --- a/nf_core/lint/files_exist.py +++ b/nf_core/lint/files_exist.py @@ -233,7 +233,7 @@ def pf(file_path): hint = "" for file_hint in files_hint: if file_hint[0] == files: - hint = +file_hint[1] + hint = file_hint[1] warned.append(f"File not found: {self._wrap_quotes(files)} {hint}") # Files that cause an error if they exist From 43fe2996810e54db0b20d0507d13c5a0bbf2670a Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 13:28:04 +0100 Subject: [PATCH 15/65] fake ro-crate file for lint tests --- .github/workflows/create-lint-wf.yml | 2 ++ .github/workflows/create-test-lint-wf-template.yml | 2 ++ nf_core/lint/files_exist.py | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/create-lint-wf.yml b/.github/workflows/create-lint-wf.yml index dbee44654a..51992006df 100644 --- a/.github/workflows/create-lint-wf.yml +++ b/.github/workflows/create-lint-wf.yml @@ -60,6 +60,8 @@ jobs: mkdir create-lint-wf && cd create-lint-wf export NXF_WORK=$(pwd) nf-core --log-file log.txt create -n testpipeline -d "This pipeline is for testing" -a "Testing McTestface" --plain + # fake ro-crate + touch nf-core-testpipeline/nf-core-testpipeline.crate.json # Try syncing it before we change anything - name: nf-core sync diff --git a/.github/workflows/create-test-lint-wf-template.yml b/.github/workflows/create-test-lint-wf-template.yml index 3805c1a240..6123dd5c88 100644 --- a/.github/workflows/create-test-lint-wf-template.yml +++ b/.github/workflows/create-test-lint-wf-template.yml @@ -92,6 +92,8 @@ jobs: run: | cd create-test-lint-wf nf-core --log-file log.txt create -n testpipeline -d "This pipeline is for testing" -a "Testing McTestface" --template-yaml ${{ matrix.TEMPLATE }} + # fake ro-crate + touch nf-core-testpipeline/nf-core-testpipeline.crate.json - name: run the pipeline run: | diff --git a/nf_core/lint/files_exist.py b/nf_core/lint/files_exist.py index d9addd975e..f6f20a98f9 100644 --- a/nf_core/lint/files_exist.py +++ b/nf_core/lint/files_exist.py @@ -201,7 +201,7 @@ def files_exist(self): files_warn_ifexists = [".travis.yml"] files_hint = [ - [["ro-crate-metadata.json"], "Run `nf-core rocrate to generate this file."], + [["ro-crate-metadata.json"], "Run `nf-core rocrate` to generate this file."], ] # Remove files that should be ignored according to the linting config ignore_files = self.lint_config.get("files_exist", []) From b0ed03829c4d1c8f726d9435e742eb49cc4f44d6 Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 13:31:24 +0100 Subject: [PATCH 16/65] fix filename for CI --- .github/workflows/create-lint-wf.yml | 2 +- .github/workflows/create-test-lint-wf-template.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/create-lint-wf.yml b/.github/workflows/create-lint-wf.yml index 51992006df..c577a11e79 100644 --- a/.github/workflows/create-lint-wf.yml +++ b/.github/workflows/create-lint-wf.yml @@ -61,7 +61,7 @@ jobs: export NXF_WORK=$(pwd) nf-core --log-file log.txt create -n testpipeline -d "This pipeline is for testing" -a "Testing McTestface" --plain # fake ro-crate - touch nf-core-testpipeline/nf-core-testpipeline.crate.json + touch ro-crate-metadata.json # Try syncing it before we change anything - name: nf-core sync diff --git a/.github/workflows/create-test-lint-wf-template.yml b/.github/workflows/create-test-lint-wf-template.yml index 6123dd5c88..6b805817bf 100644 --- a/.github/workflows/create-test-lint-wf-template.yml +++ b/.github/workflows/create-test-lint-wf-template.yml @@ -93,7 +93,7 @@ jobs: cd create-test-lint-wf nf-core --log-file log.txt create -n testpipeline -d "This pipeline is for testing" -a "Testing McTestface" --template-yaml ${{ matrix.TEMPLATE }} # fake ro-crate - touch nf-core-testpipeline/nf-core-testpipeline.crate.json + touch nf-core-testpipeline.crate.json - name: run the pipeline run: | From 24510be13a43480b6ff11a18c89ac553812fc19d Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 13:42:51 +0100 Subject: [PATCH 17/65] better help hint message --- .github/workflows/create-test-lint-wf-template.yml | 2 +- README.md | 6 +++--- nf_core/__main__.py | 4 ++-- nf_core/lint/files_exist.py | 5 ++++- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/workflows/create-test-lint-wf-template.yml b/.github/workflows/create-test-lint-wf-template.yml index 6b805817bf..39246d9592 100644 --- a/.github/workflows/create-test-lint-wf-template.yml +++ b/.github/workflows/create-test-lint-wf-template.yml @@ -93,7 +93,7 @@ jobs: cd create-test-lint-wf nf-core --log-file log.txt create -n testpipeline -d "This pipeline is for testing" -a "Testing McTestface" --template-yaml ${{ matrix.TEMPLATE }} # fake ro-crate - touch nf-core-testpipeline.crate.json + touch ro-crate-metadata.json - name: run the pipeline run: | diff --git a/README.md b/README.md index 6b4903250b..d08d06277f 100644 --- a/README.md +++ b/README.md @@ -737,19 +737,19 @@ Supply this using the `--auth-token` flag. The [Research Object Crate (RO-Crate)](https://www.researchobject.org/ro-crate/) is a community-driven specification for packaging research data with associated metadata and their relationships. The RO-Crate metadata file is a JSON-LD file that describes the research data and its relationships. The RO-Crate metadata file can be used to generate a RO-Crate, which is a single file that contains the research data and its metadata. The RO-Crate can be used to share research data and its metadata with other researchers and to make research data FAIR (Findable, Accessible, Interoperable, and Reusable). -The `nf-core crate` command creates a RO-Crate metadata file for a nf-core pipeline. +The `nf-core rocrate` command creates a RO-Crate metadata file for a nf-core pipeline. -![`nf-core crate`](docs/images/nf-core-crate.svg) +![`nf-core rocrate`](docs/images/nf-core-rocrate.svg) > [!NOTE] > We make the following assumptions about the pipeline: > > - If the pipeline has one of the famous nf-core metro maps, we expect it to be a `.png` file stored in `docs/images`, with `metro_map` in its name, e.g., `nf-core-rnaseq_metro_map_grey.png`. This image will be set as the workflow graph in the RO-Crate. -> - Based on the names in the `manifest.author` inside the `nextflow.config` file, we add the [ORCID](https://orcid.org/) IDs of the authors to the RO-Crate metadata file (based on very simple assumpations). Please double-check the ORCID IDs after creating the RO-Crate metadata file (see log) and add missing ORCID IDs manually. +> - Based on the names in the `manifest.author` inside the `nextflow.config` file and the contributors to `main.nf` according to git, we add the [ORCID](https://orcid.org/) IDs of the authors to the RO-Crate metadata file (based on very simple assumpations). Please double-check the ORCID IDs after creating the RO-Crate metadata file (see log) and add missing ORCID IDs manually. ## Tools CLI TUI diff --git a/nf_core/__main__.py b/nf_core/__main__.py index f1c4ba2d2a..b13bb2641f 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -2152,7 +2152,7 @@ def rocrate(pipeline_dir, json_path, zip_path, pipeline_version): """ Make an Research Object Crate """ - import nf_core.rocrate + from nf_core.rocrate import RoCrate if json_path is None and zip_path is None: log.error("Either --json_path or --zip_path must be specified") @@ -2164,7 +2164,7 @@ def rocrate(pipeline_dir, json_path, zip_path, pipeline_version): if zip_path is not None: zip_path = Path(zip_path) try: - rocrate_obj = nf_core.rocrate.RoCrate(pipeline_dir, pipeline_version) + rocrate_obj = RoCrate(pipeline_dir, pipeline_version) rocrate_obj.create_ro_crate(pipeline_dir, metadata_path=json_path, zip_path=zip_path) except (UserWarning, LookupError, FileNotFoundError) as e: log.error(e) diff --git a/nf_core/lint/files_exist.py b/nf_core/lint/files_exist.py index f6f20a98f9..7192ecc19b 100644 --- a/nf_core/lint/files_exist.py +++ b/nf_core/lint/files_exist.py @@ -201,7 +201,10 @@ def files_exist(self): files_warn_ifexists = [".travis.yml"] files_hint = [ - [["ro-crate-metadata.json"], "Run `nf-core rocrate` to generate this file."], + [ + ["ro-crate-metadata.json"], + ". Run `nf-core rocrate` to generate this file. Read more about RO-Crates in the [nf-core/tools docs](https://nf-co.re/tools#create-a-ro-crate-metadata-file).", + ], ] # Remove files that should be ignored according to the linting config ignore_files = self.lint_config.get("files_exist", []) From 3bd0eaf6dcf5ba38dfcf590f234dc1f980cf0931 Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 14:22:37 +0100 Subject: [PATCH 18/65] fix class name --- nf_core/__main__.py | 10 +++++----- nf_core/lint/__init__.py | 10 +++++----- nf_core/lint/files_exist.py | 2 +- nf_core/rocrate.py | 2 +- tests/test_rocrate.py | 8 +++----- 5 files changed, 15 insertions(+), 17 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index b13bb2641f..15db853e59 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -2139,9 +2139,9 @@ def sync(dir, from_branch, pull_request, github_repository, username, template_y "--json_path", default=Path.cwd(), type=str, - help="Path to save RO Crate metadata json", + help="Path to save RO Crate metadata json file to", ) -@click.option("-z", "--zip_path", type=str, help="Path to save RO Crate zip") +@click.option("-z", "--zip_path", type=str, help="Path to save RO Crate zip file to") @click.option( "-pv", "--pipeline_version", @@ -2152,10 +2152,10 @@ def rocrate(pipeline_dir, json_path, zip_path, pipeline_version): """ Make an Research Object Crate """ - from nf_core.rocrate import RoCrate + from nf_core.rocrate import ROCrate if json_path is None and zip_path is None: - log.error("Either --json_path or --zip_path must be specified") + log.error("Either `--json_path` or `--zip_path` must be specified.") sys.exit(1) else: pipeline_dir = Path(pipeline_dir) @@ -2164,7 +2164,7 @@ def rocrate(pipeline_dir, json_path, zip_path, pipeline_version): if zip_path is not None: zip_path = Path(zip_path) try: - rocrate_obj = RoCrate(pipeline_dir, pipeline_version) + rocrate_obj = ROCrate(pipeline_dir, pipeline_version) rocrate_obj.create_ro_crate(pipeline_dir, metadata_path=json_path, zip_path=zip_path) except (UserWarning, LookupError, FileNotFoundError) as e: log.error(e) diff --git a/nf_core/lint/__init__.py b/nf_core/lint/__init__.py index 797ebbcc91..b34b23d10f 100644 --- a/nf_core/lint/__init__.py +++ b/nf_core/lint/__init__.py @@ -382,7 +382,7 @@ def format_result(test_results): # Table of passed tests if len(self.passed) > 0 and show_passed: console.print( - rich.panel.Panel( + Panel( format_result(self.passed), title=rf"[bold][✔] {len(self.passed)} Pipeline Test{_s(self.passed)} Passed", title_align="left", @@ -394,7 +394,7 @@ def format_result(test_results): # Table of fixed tests if len(self.fixed) > 0: console.print( - rich.panel.Panel( + Panel( format_result(self.fixed), title=rf"[bold][?] {len(self.fixed)} Pipeline Test{_s(self.fixed)} Fixed", title_align="left", @@ -406,7 +406,7 @@ def format_result(test_results): # Table of ignored tests if len(self.ignored) > 0: console.print( - rich.panel.Panel( + Panel( format_result(self.ignored), title=rf"[bold][?] {len(self.ignored)} Pipeline Test{_s(self.ignored)} Ignored", title_align="left", @@ -418,7 +418,7 @@ def format_result(test_results): # Table of warning tests if len(self.warned) > 0: console.print( - rich.panel.Panel( + Panel( format_result(self.warned), title=rf"[bold][!] {len(self.warned)} Pipeline Test Warning{_s(self.warned)}", title_align="left", @@ -430,7 +430,7 @@ def format_result(test_results): # Table of failing tests if len(self.failed) > 0: console.print( - rich.panel.Panel( + Panel( format_result(self.failed), title=rf"[bold][✗] {len(self.failed)} Pipeline Test{_s(self.failed)} Failed", title_align="left", diff --git a/nf_core/lint/files_exist.py b/nf_core/lint/files_exist.py index 7192ecc19b..1efba0fbc9 100644 --- a/nf_core/lint/files_exist.py +++ b/nf_core/lint/files_exist.py @@ -257,7 +257,7 @@ def pf(file_path): else: passed.append(f"File not found check: {self._wrap_quotes(file)}") - # Files that are ignoed + # Files that are ignored for file in ignore_files: ignored.append(f"File is ignored: {self._wrap_quotes(file)}") diff --git a/nf_core/rocrate.py b/nf_core/rocrate.py index 1da39d0d65..3857460d0a 100644 --- a/nf_core/rocrate.py +++ b/nf_core/rocrate.py @@ -19,7 +19,7 @@ log = logging.getLogger(__name__) -class RoCrate: +class ROCrate: """ Class to generate an RO Crate for a pipeline diff --git a/tests/test_rocrate.py b/tests/test_rocrate.py index 3db1876c04..83911eab6e 100644 --- a/tests/test_rocrate.py +++ b/tests/test_rocrate.py @@ -54,16 +54,14 @@ def test_rocrate_creation(self): """Run the nf-core rocrate command""" # Run the command - self.rocrate_obj = nf_core.rocrate.RoCrate(self.test_pipeline_dir) - self.rocrate_obj.create_ro_crate( - self.test_pipeline_dir, metadata_fn=Path(self.test_pipeline_dir, "ro-crate-metadata.json") - ) + self.rocrate_obj = nf_core.rocrate.ROCrate(self.test_pipeline_dir) + self.rocrate_obj.create_ro_crate(self.test_pipeline_dir, metadata_path=Path(self.test_pipeline_dir)) # Check that the crate was created self.assertTrue(Path(self.test_pipeline_dir, "ro-crate-metadata.json").exists()) # Check that the entries in the crate are correct - crate = rocrate.rocrate.ROCrate(str(self.test_pipeline_dir)) + crate = rocrate.rocrate.ROCrate(self.test_pipeline_dir) entities = crate.get_entities() # Check if the correct entities are set: From 97d298a2ad4c169c6429f98e031a53f92418e917 Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 14:24:08 +0100 Subject: [PATCH 19/65] add correct parent directory to faked crate --- .github/workflows/create-lint-wf.yml | 2 +- .github/workflows/create-test-lint-wf-template.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/create-lint-wf.yml b/.github/workflows/create-lint-wf.yml index c577a11e79..26cbcd64e9 100644 --- a/.github/workflows/create-lint-wf.yml +++ b/.github/workflows/create-lint-wf.yml @@ -61,7 +61,7 @@ jobs: export NXF_WORK=$(pwd) nf-core --log-file log.txt create -n testpipeline -d "This pipeline is for testing" -a "Testing McTestface" --plain # fake ro-crate - touch ro-crate-metadata.json + touch nf-core-testpipeline/ro-crate-metadata.json # Try syncing it before we change anything - name: nf-core sync diff --git a/.github/workflows/create-test-lint-wf-template.yml b/.github/workflows/create-test-lint-wf-template.yml index 39246d9592..5eac6a1db0 100644 --- a/.github/workflows/create-test-lint-wf-template.yml +++ b/.github/workflows/create-test-lint-wf-template.yml @@ -93,7 +93,7 @@ jobs: cd create-test-lint-wf nf-core --log-file log.txt create -n testpipeline -d "This pipeline is for testing" -a "Testing McTestface" --template-yaml ${{ matrix.TEMPLATE }} # fake ro-crate - touch ro-crate-metadata.json + touch my-prefix-testpipeline/ro-crate-metadata.json - name: run the pipeline run: | From d6b7e6b295535c384e30b88665cbec20f665de89 Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 15:11:27 +0100 Subject: [PATCH 20/65] Add a fake RO crate file to pass lint tests, rewrite with pathlib --- tests/test_lint.py | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/tests/test_lint.py b/tests/test_lint.py index 32913bda0d..172f63ba64 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -1,11 +1,10 @@ """Some tests covering the linting code. """ -import fnmatch import json -import os import shutil import tempfile import unittest +from pathlib import Path import yaml @@ -25,26 +24,29 @@ def setUp(self): """ self.tmp_dir = tempfile.mkdtemp() - self.test_pipeline_dir = os.path.join(self.tmp_dir, "nf-core-testpipeline") + self.test_pipeline_dir = Path(self.tmp_dir, "nf-core-testpipeline") self.create_obj = nf_core.create.PipelineCreate( "testpipeline", "This is a test pipeline", "Test McTestFace", outdir=self.test_pipeline_dir, plain=True ) self.create_obj.init_pipeline() + # Add a fake RO crate file + Path(self.test_pipeline_dir, "ro-crate-metadata.json").touch() + # Base lint object on this directory self.lint_obj = nf_core.lint.PipelineLint(self.test_pipeline_dir) def tearDown(self): """Clean up temporary files and folders""" - if os.path.exists(self.tmp_dir): + if Path(self.tmp_dir).exists(): shutil.rmtree(self.tmp_dir) def _make_pipeline_copy(self): """Make a copy of the test pipeline that can be edited Returns: Path to new temp directory with pipeline""" - new_pipeline = os.path.join(self.tmp_dir, "nf-core-testpipeline-copy") + new_pipeline = Path(self.tmp_dir, "nf-core-testpipeline-copy") shutil.copytree(self.test_pipeline_dir, new_pipeline) return new_pipeline @@ -70,7 +72,7 @@ def test_init_pipeline_lint(self): assert "version_consistency" in lint_obj.lint_tests # Tests that parent nf_core.utils.Pipeline class __init__() is working to find git hash - assert len(lint_obj.git_sha) > 0 + assert len(lint_obj.git_sha or "") > 0 def test_load_lint_config_not_found(self): """Try to load a linting config file that doesn't exist""" @@ -86,7 +88,7 @@ def test_load_lint_config_ignore_all_tests(self): # Make a config file listing all test names config_dict = {"lint": {test_name: False for test_name in lint_obj.lint_tests}} - with open(os.path.join(new_pipeline, ".nf-core.yml"), "w") as fh: + with open(Path(new_pipeline, ".nf-core.yml"), "w") as fh: yaml.dump(config_dict, fh) # Load the new lint config file and check @@ -130,7 +132,7 @@ def test_json_output(self, tmp_dir): self.lint_obj.warned.append(("test_three", "This test gave a warning")) # Make a temp dir for the JSON output - json_fn = os.path.join(tmp_dir, "lint_results.json") + json_fn = Path(tmp_dir, "lint_results.json") self.lint_obj._save_json_results(json_fn) # Load created JSON file and check its contents @@ -156,25 +158,23 @@ def test_sphinx_md_files(self): """Check that we have .md files for all lint module code, and that there are no unexpected files (eg. deleted lint tests)""" - docs_basedir = os.path.join( - os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "docs", "api", "_src", "pipeline_lint_tests" - ) + docs_basedir = Path(Path(__file__).resolve().parent.parent.parent, "docs", "api", "_src", "pipeline_lint_tests") # Get list of existing .md files - existing_docs = [] - for fn in os.listdir(docs_basedir): - if fnmatch.fnmatch(fn, "*.md") and not fnmatch.fnmatch(fn, "index.md"): - existing_docs.append(os.path.join(docs_basedir, fn)) + existing_docs = list(docs_basedir.glob("*.md")) + existing_docs = [fn for fn in existing_docs if fn.name != "index.md"] # Check .md files against each test name lint_obj = nf_core.lint.PipelineLint("", True) for test_name in lint_obj.lint_tests: - fn = os.path.join(docs_basedir, f"{test_name}.md") - assert os.path.exists(fn), f"Could not find lint docs .md file: {fn}" + fn = docs_basedir / f"{test_name}.md" + assert fn.exists(), f"Could not find lint docs .md file: {fn}" existing_docs.remove(fn) # Check that we have no remaining .md files that we didn't expect - assert len(existing_docs) == 0, f"Unexpected lint docs .md files found: {', '.join(existing_docs)}" + assert ( + len(existing_docs) == 0 + ), f"Unexpected lint docs .md files found: {', '.join(str(fn) for fn in existing_docs)}" ####################### # SPECIFIC LINT TESTS # @@ -385,7 +385,7 @@ def test_sphinx_md_files(self): # """ Tests the conda environment config checks with a working example """ # lint_obj = nf_core.lint.PipelineLint(PATH_WORKING_EXAMPLE) # lint_obj.files = ["environment.yml"] -# with open(os.path.join(PATH_WORKING_EXAMPLE, "environment.yml"), "r") as fh: +# with open(Path(PATH_WORKING_EXAMPLE, "environment.yml"), "r") as fh: # lint_obj.conda_config = yaml.safe_load(fh) # lint_obj.pipeline_name = "tools" # lint_obj.config["manifest.version"] = "0.4" @@ -397,7 +397,7 @@ def test_sphinx_md_files(self): # """ Tests the conda environment config fails with a bad example """ # lint_obj = nf_core.lint.PipelineLint(PATH_WORKING_EXAMPLE) # lint_obj.files = ["environment.yml"] -# with open(os.path.join(PATH_WORKING_EXAMPLE, "environment.yml"), "r") as fh: +# with open(Path(PATH_WORKING_EXAMPLE, "environment.yml"), "r") as fh: # lint_obj.conda_config = yaml.safe_load(fh) # lint_obj.conda_config["dependencies"] = ["fastqc", "multiqc=0.9", "notapackaage=0.4"] # lint_obj.pipeline_name = "not_tools" @@ -429,7 +429,7 @@ def test_sphinx_md_files(self): # lint_obj = nf_core.lint.PipelineLint(PATH_WORKING_EXAMPLE) # lint_obj.version = "1.11" # lint_obj.files = ["environment.yml", "Dockerfile"] -# with open(os.path.join(PATH_WORKING_EXAMPLE, "Dockerfile"), "r") as fh: +# with open(Path(PATH_WORKING_EXAMPLE, "Dockerfile"), "r") as fh: # lint_obj.dockerfile = fh.read().splitlines() # lint_obj.conda_config["name"] = "nf-core-tools-0.4" # lint_obj.check_conda_dockerfile() From 055117342db6626d25916ce62d15ab56be596a87 Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 15:17:59 +0100 Subject: [PATCH 21/65] add empty ro-crate after sync --- .github/workflows/create-lint-wf.yml | 7 +++++-- .github/workflows/create-test-lint-wf-template.yml | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/create-lint-wf.yml b/.github/workflows/create-lint-wf.yml index 26cbcd64e9..2282f4fc15 100644 --- a/.github/workflows/create-lint-wf.yml +++ b/.github/workflows/create-lint-wf.yml @@ -60,8 +60,6 @@ jobs: mkdir create-lint-wf && cd create-lint-wf export NXF_WORK=$(pwd) nf-core --log-file log.txt create -n testpipeline -d "This pipeline is for testing" -a "Testing McTestface" --plain - # fake ro-crate - touch nf-core-testpipeline/ro-crate-metadata.json # Try syncing it before we change anything - name: nf-core sync @@ -93,6 +91,11 @@ jobs: run: find nf-core-testpipeline -type f -exec sed -i 's/zenodo.XXXXXX/zenodo.123456/g' {} \; working-directory: create-lint-wf + # Add empty ro-crate file + - name: add empty ro-crate file + run: touch nf-core-testpipeline/ro-crate-metadata.json + working-directory: create-lint-wf + # Run nf-core linting - name: nf-core lint run: nf-core --verbose --log-file log.txt --hide-progress lint --dir nf-core-testpipeline --fail-ignored --fail-warned diff --git a/.github/workflows/create-test-lint-wf-template.yml b/.github/workflows/create-test-lint-wf-template.yml index 5eac6a1db0..3babd91db7 100644 --- a/.github/workflows/create-test-lint-wf-template.yml +++ b/.github/workflows/create-test-lint-wf-template.yml @@ -94,6 +94,7 @@ jobs: nf-core --log-file log.txt create -n testpipeline -d "This pipeline is for testing" -a "Testing McTestface" --template-yaml ${{ matrix.TEMPLATE }} # fake ro-crate touch my-prefix-testpipeline/ro-crate-metadata.json + git commit -am "add ro-crate" - name: run the pipeline run: | From 59767c31392cbc855578d1ae2e83891c992b4961 Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 15:21:20 +0100 Subject: [PATCH 22/65] fix tests --- nf_core/lint/files_exist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nf_core/lint/files_exist.py b/nf_core/lint/files_exist.py index 1efba0fbc9..29a182f0b0 100644 --- a/nf_core/lint/files_exist.py +++ b/nf_core/lint/files_exist.py @@ -237,7 +237,7 @@ def pf(file_path): for file_hint in files_hint: if file_hint[0] == files: hint = file_hint[1] - warned.append(f"File not found: {self._wrap_quotes(files)} {hint}") + warned.append(f"File not found: {self._wrap_quotes(files)}{hint}") # Files that cause an error if they exist for file in files_fail_ifexists: From 440168ced984c00ce008db05f43d032f674556c9 Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 15:25:36 +0100 Subject: [PATCH 23/65] fix linting tests --- .github/workflows/create-test-lint-wf-template.yml | 8 +++++--- tests/test_lint.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/create-test-lint-wf-template.yml b/.github/workflows/create-test-lint-wf-template.yml index 3babd91db7..91f54b1628 100644 --- a/.github/workflows/create-test-lint-wf-template.yml +++ b/.github/workflows/create-test-lint-wf-template.yml @@ -92,9 +92,6 @@ jobs: run: | cd create-test-lint-wf nf-core --log-file log.txt create -n testpipeline -d "This pipeline is for testing" -a "Testing McTestface" --template-yaml ${{ matrix.TEMPLATE }} - # fake ro-crate - touch my-prefix-testpipeline/ro-crate-metadata.json - git commit -am "add ro-crate" - name: run the pipeline run: | @@ -125,6 +122,11 @@ jobs: run: find my-prefix-testpipeline -type f -exec sed -i 's/zenodo.XXXXXX/zenodo.123456/g' {} \; working-directory: create-test-lint-wf + # Add empty ro-crate file + - name: add empty ro-crate file + run: touch my-prefix-testpipeline/ro-crate-metadata.json + working-directory: create-test-lint-wf + # Run nf-core linting - name: nf-core lint run: nf-core --log-file log.txt --hide-progress lint --dir my-prefix-testpipeline --fail-warned diff --git a/tests/test_lint.py b/tests/test_lint.py index 172f63ba64..f89948d321 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -158,7 +158,7 @@ def test_sphinx_md_files(self): """Check that we have .md files for all lint module code, and that there are no unexpected files (eg. deleted lint tests)""" - docs_basedir = Path(Path(__file__).resolve().parent.parent.parent, "docs", "api", "_src", "pipeline_lint_tests") + docs_basedir = Path(Path(__file__).resolve().parent.parent, "docs", "api", "_src", "pipeline_lint_tests") # Get list of existing .md files existing_docs = list(docs_basedir.glob("*.md")) From d0e03b13d873c2c2eb7fcbc6da52383face4160d Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 15:32:44 +0100 Subject: [PATCH 24/65] add linting test for new hint --- tests/lint/files_exist.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/tests/lint/files_exist.py b/tests/lint/files_exist.py index 4e5e4d3c2b..d382b68605 100644 --- a/tests/lint/files_exist.py +++ b/tests/lint/files_exist.py @@ -1,4 +1,4 @@ -import os +from pathlib import Path import nf_core.lint @@ -7,7 +7,7 @@ def test_files_exist_missing_config(self): """Lint test: critical files missing FAIL""" new_pipeline = self._make_pipeline_copy() - os.remove(os.path.join(new_pipeline, "CHANGELOG.md")) + Path(new_pipeline, "CHANGELOG.md").unlink() lint_obj = nf_core.lint.PipelineLint(new_pipeline) lint_obj._load() @@ -21,7 +21,7 @@ def test_files_exist_missing_main(self): """Check if missing main issues warning""" new_pipeline = self._make_pipeline_copy() - os.remove(os.path.join(new_pipeline, "main.nf")) + Path(new_pipeline, "main.nf").unlink() lint_obj = nf_core.lint.PipelineLint(new_pipeline) lint_obj._load() @@ -34,8 +34,8 @@ def test_files_exist_depreciated_file(self): """Check whether depreciated file issues warning""" new_pipeline = self._make_pipeline_copy() - nf = os.path.join(new_pipeline, "parameters.settings.json") - os.system(f"touch {nf}") + nf = Path(new_pipeline, "parameters.settings.json") + nf.touch() lint_obj = nf_core.lint.PipelineLint(new_pipeline) lint_obj._load() @@ -53,3 +53,18 @@ def test_files_exist_pass(self): results = lint_obj.files_exist() assert results["failed"] == [] + + +def test_files_exist_hint(self): + """Check if hint is added to missing crate file""" + new_pipeline = self._make_pipeline_copy() + + Path(new_pipeline, "ro-crate-metadata.json").unlink() + + lint_obj = nf_core.lint.PipelineLint(new_pipeline) + lint_obj._load() + + results = lint_obj.files_exist() + assert results["warned"] == [ + "File not found: `ro-crate-metadata.json`. Run `nf-core rocrate` to generate this file. Read more about RO-Crates in the [nf-core/tools docs](https://nf-co.re/tools#create-a-ro-crate-metadata-file)." + ] From eae9a9724f5fc3a7c43fad38325e4246fc109753 Mon Sep 17 00:00:00 2001 From: mashehu Date: Wed, 31 Jul 2024 14:01:16 +0200 Subject: [PATCH 25/65] fix tests --- nf_core/commands_pipelines.py | 2 +- nf_core/pipelines/lint_utils.py | 5 +++-- nf_core/{ => pipelines}/rocrate.py | 3 +-- tests/test_rocrate.py | 15 +++++++-------- 4 files changed, 12 insertions(+), 13 deletions(-) rename nf_core/{ => pipelines}/rocrate.py (99%) diff --git a/nf_core/commands_pipelines.py b/nf_core/commands_pipelines.py index a3c490372e..4f8c9d6df3 100644 --- a/nf_core/commands_pipelines.py +++ b/nf_core/commands_pipelines.py @@ -280,7 +280,7 @@ def pipelines_list(ctx, keywords, sort, json, show_archived): # nf-core pipelines ro-crate def pipelines_ro_crate(ctx, pipeline_dir, json_path, zip_path, pipeline_version) -> None: - from nf_core.rocrate import ROCrate + from nf_core.pipelines.rocrate import ROCrate if json_path is None and zip_path is None: log.error("Either `--json_path` or `--zip_path` must be specified.") diff --git a/nf_core/pipelines/lint_utils.py b/nf_core/pipelines/lint_utils.py index 4ccf790076..615d94a93c 100644 --- a/nf_core/pipelines/lint_utils.py +++ b/nf_core/pipelines/lint_utils.py @@ -2,9 +2,10 @@ import logging import subprocess from pathlib import Path -from typing import List +from typing import List, Union import rich +import rich.box from rich.console import Console from rich.table import Table @@ -112,7 +113,7 @@ def ignore_file(lint_name: str, file_path: Path, dir_path: Path) -> List[List[st ignored: List[str] = [] _, lint_conf = nf_core.utils.load_tools_config(dir_path) lint_conf = lint_conf.get("lint", {}) - ignore_entry: List[str] | bool = lint_conf.get(lint_name, []) + ignore_entry: Union[List[str], bool] = lint_conf.get(lint_name, []) full_path = dir_path / file_path # Return a failed status if we can't find the file if not full_path.is_file(): diff --git a/nf_core/rocrate.py b/nf_core/pipelines/rocrate.py similarity index 99% rename from nf_core/rocrate.py rename to nf_core/pipelines/rocrate.py index 3857460d0a..dc64fcb016 100644 --- a/nf_core/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -1,6 +1,5 @@ #!/usr/bin/env python -""" Code to deal with pipeline RO (Research Object) Crates """ - +"""Code to deal with pipeline RO (Research Object) Crates""" import logging import sys diff --git a/tests/test_rocrate.py b/tests/test_rocrate.py index 83911eab6e..fe3cf2e8e0 100644 --- a/tests/test_rocrate.py +++ b/tests/test_rocrate.py @@ -1,5 +1,4 @@ -""" Test the nf-core rocrate command """ - +"""Test the nf-core pipelines rocrate command""" import shutil import tempfile @@ -9,8 +8,9 @@ import rocrate.rocrate from git import Repo -import nf_core.create -import nf_core.rocrate +import nf_core.pipelines.create +import nf_core.pipelines.create.create +import nf_core.pipelines.rocrate import nf_core.utils @@ -25,15 +25,14 @@ def setUp(self): self.tmp_dir = Path(tempfile.mkdtemp()) self.test_pipeline_dir = Path(self.tmp_dir, "nf-core-testpipeline") - self.create_obj = nf_core.create.PipelineCreate( + self.create_obj = nf_core.pipelines.create.create.PipelineCreate( name="testpipeline", description="This is a test pipeline", author="Test McTestFace", - outdir=self.test_pipeline_dir, + outdir=str(self.test_pipeline_dir), version="1.0.0", no_git=False, force=True, - plain=True, ) self.create_obj.init_pipeline() @@ -54,7 +53,7 @@ def test_rocrate_creation(self): """Run the nf-core rocrate command""" # Run the command - self.rocrate_obj = nf_core.rocrate.ROCrate(self.test_pipeline_dir) + self.rocrate_obj = nf_core.pipelines.rocrate.ROCrate(self.test_pipeline_dir) self.rocrate_obj.create_ro_crate(self.test_pipeline_dir, metadata_path=Path(self.test_pipeline_dir)) # Check that the crate was created From 76b6d67f83103bf4e4aae75ac53e06e5811bb691 Mon Sep 17 00:00:00 2001 From: mashehu Date: Wed, 31 Jul 2024 14:04:51 +0200 Subject: [PATCH 26/65] fix ci test --- .github/actions/create-lint-wf/action.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/actions/create-lint-wf/action.yml b/.github/actions/create-lint-wf/action.yml index dc5acbff9a..bb6a213103 100644 --- a/.github/actions/create-lint-wf/action.yml +++ b/.github/actions/create-lint-wf/action.yml @@ -61,6 +61,7 @@ runs: # Add empty ro-crate file - name: add empty ro-crate file + shell: bash run: touch nf-core-testpipeline/ro-crate-metadata.json working-directory: create-lint-wf From 12529a5e0388bd02840a8c463cb56cb448d7759a Mon Sep 17 00:00:00 2001 From: mashehu Date: Wed, 31 Jul 2024 17:54:22 +0200 Subject: [PATCH 27/65] use github to guess author name, set names as ids if no orcid --- nf_core/__main__.py | 7 ++++--- nf_core/pipelines/rocrate.py | 40 +++++++++++++++++++++++++++--------- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 0b3e61a961..4eabf7bacc 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -580,8 +580,8 @@ def command_pipelines_list(ctx, keywords, sort, json, show_archived): pipelines_list(ctx, keywords, sort, json, show_archived) -# nf-core ro-crate -@nf_core_cli.command("ro-crate") +# nf-core pipelines ro-crate +@pipelines.command("ro-crate") @click.argument( "pipeline_dir", type=click.Path(exists=True), @@ -603,7 +603,8 @@ def command_pipelines_list(ctx, keywords, sort, json, show_archived): type=str, help="Version of pipeline to use for RO Crate", ) -def rocrate(ctx, pipeline_dir, json_path, zip_path, pipeline_version): +@click.pass_context +def ro_crate(ctx, pipeline_dir, json_path, zip_path, pipeline_version): """ Make an Research Object Crate """ diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index dc64fcb016..259202d0de 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -4,8 +4,10 @@ import logging import sys import tempfile +from datetime import datetime from pathlib import Path -from typing import Union +from typing import Set, Union, cast +from urllib.parse import quote import requests import rocrate.model.entity @@ -204,7 +206,7 @@ def set_main_entity(self, main_entity_filename: str): log.debug(f"Adding topics: {topics}") wf_file.append_to("keywords", topics) - def add_main_authors(self, wf_file): + def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: """ Add workflow authors to the crate """ @@ -219,34 +221,45 @@ def add_main_authors(self, wf_file): return # look at git contributors for author names try: - contributors = set() + contributors: Set[str] = set() commits_touching_path = list(self.pipeline_obj.repo.iter_commits(paths="main.nf")) for commit in commits_touching_path: - contributors.add(commit.author.name) + if commit.author.name is not None: + contributors.add(commit.author.name) # exclude bots - contributors = [c for c in contributors if not c.endswith("bot") or c != "Travis CI User"] - # remove usernames (just keep names with spaces) - contributors = [c for c in contributors if " " in c] + contributors = {c for c in contributors if not c.endswith("bot") and c != "Travis CI User"} log.debug(f"Found {len(contributors)} git authors") for git_author in contributors: + git_author = requests.get(f"https://api.github.com/users/{git_author}").json().get("name", git_author) + if git_author is None: + log.debug(f"Could not find name for {git_author}") + continue + if git_author not in authors: authors.append(git_author) except AttributeError: log.debug("Could not find git authors") + # remove usernames (just keep names with spaces) + authors = [c for c in authors if " " in c] + for author in authors: log.debug(f"Adding author: {author}") orcid = get_orcid(author) - author_entitity = self.crate.add(Person(self.crate, orcid, properties={"name": author})) - wf_file.append_to("author", author_entitity) + author_entitity = self.crate.add( + Person(self.crate, orcid if orcid is not None else "#" + quote(author), properties={"name": author}) + ) + wf_file.append_to("creator", author_entitity) def add_workflow_files(self): """ Add workflow files to the RO Crate """ + import re + import nf_core.utils wf_filenames = nf_core.utils.get_wf_files(Path.cwd()) @@ -265,8 +278,15 @@ def add_workflow_files(self): if fn.endswith(".png"): log.debug(f"Adding workflow image file: {fn}") self.crate.add_jsonld({"@id": Path(fn).name, "@type": ["File", "ImageObject"]}) - if "metro_map" in fn: + if re.search(r"(metro|tube)_?(map)?", fn) and self.crate.mainEntity is not None: log.info(f"Setting main entity image to: {fn}") + # check if image is set in main entity + if self.crate.mainEntity.get("image"): + log.info( + f"Main entity already has an image: {self.crate.mainEntity.get('image')}, replacing it with: {fn}" + ) + else: + log.info(f"Setting main entity image to: {fn}") self.crate.mainEntity.append_to("image", {"@id": Path(fn).name}) continue if fn.endswith(".md"): From ff9df166981f72e4d48f4dab2a2108f730b88717 Mon Sep 17 00:00:00 2001 From: mashehu Date: Wed, 31 Jul 2024 17:55:47 +0200 Subject: [PATCH 28/65] add bioschemas, datecreated and datemodified (only set to current time atm) --- nf_core/pipelines/rocrate.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index 259202d0de..1bf503c25e 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -24,13 +24,16 @@ class ROCrate: """ Class to generate an RO Crate for a pipeline - Args: - pipeline_dir (Path): Path to the pipeline directory - version (str): Version of the pipeline to use - """ - def __init__(self, pipeline_dir: Path, version=""): + def __init__(self, pipeline_dir: Path, version="") -> None: + """ + Initialise the ROCrate object + + Args: + pipeline_dir (Path): Path to the pipeline directory + version (str): Version of the pipeline to checkout + """ from nf_core.utils import is_pipeline_directory, setup_requests_cachedir is_pipeline_directory(pipeline_dir) @@ -146,9 +149,6 @@ def make_workflow_ro_crate(self) -> None: } ) - # Set main entity file - self.set_main_entity("main.nf") - # add readme as description readme = Path("README.md") @@ -177,6 +177,9 @@ def make_workflow_ro_crate(self) -> None: else: self.crate.CreativeWorkStatus = "Stable" + # Set main entity file + self.set_main_entity("main.nf") + # Add all other files self.add_workflow_files() @@ -184,16 +187,21 @@ def set_main_entity(self, main_entity_filename: str): """ Set the main.nf as the main entity of the crate and add necessary metadata """ - wf_file = self.crate.add_jsonld( { "@id": main_entity_filename, "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], }, - ) + ) # FIXME: this adds "#main.nf" to the crate, but it should be "main.nf" + wf_file = cast(rocrate.model.entity.Entity, wf_file) # ro-crate is untyped so need to cast type manually self.crate.mainEntity = wf_file self.add_main_authors(wf_file) wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}) + wf_file.append_to("dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/") + # add dateCreated and dateModified, based on the current data + wf_file.append_to("dateCreated", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) + wf_file.append_to("dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) + # get keywords from nf-core website remote_workflows = requests.get("https://nf-co.re/pipelines.json").json()["remote_workflows"] # go through all remote workflows and find the one that matches the pipeline name From 9f7dccd2caa604d5e53999689e1c6f1d2398e876 Mon Sep 17 00:00:00 2001 From: mashehu Date: Mon, 5 Aug 2024 07:58:47 +0200 Subject: [PATCH 29/65] follow `ComputationalWorkflow` schema for main entitty. --- nf_core/__main__.py | 9 ++- nf_core/commands_pipelines.py | 9 ++- nf_core/pipelines/rocrate.py | 134 +++++++++++++++++++++++++--------- 3 files changed, 115 insertions(+), 37 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 4eabf7bacc..45c0a84901 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -602,9 +602,16 @@ def command_pipelines_list(ctx, keywords, sort, json, show_archived): "--pipeline_version", type=str, help="Version of pipeline to use for RO Crate", + default="", ) @click.pass_context -def ro_crate(ctx, pipeline_dir, json_path, zip_path, pipeline_version): +def ro_crate( + ctx, + pipeline_dir: str, + json_path: str, + zip_path: str, + pipeline_version: str, +): """ Make an Research Object Crate """ diff --git a/nf_core/commands_pipelines.py b/nf_core/commands_pipelines.py index 4f8c9d6df3..c69cfa5668 100644 --- a/nf_core/commands_pipelines.py +++ b/nf_core/commands_pipelines.py @@ -2,6 +2,7 @@ import os import sys from pathlib import Path +from typing import Optional, Union import rich @@ -279,7 +280,13 @@ def pipelines_list(ctx, keywords, sort, json, show_archived): # nf-core pipelines ro-crate -def pipelines_ro_crate(ctx, pipeline_dir, json_path, zip_path, pipeline_version) -> None: +def pipelines_ro_crate( + ctx, + pipeline_dir: Union[str, Path], + json_path: Optional[Union[str, Path]], + zip_path: Optional[Union[str, Path]], + pipeline_version: str, +) -> None: from nf_core.pipelines.rocrate import ROCrate if json_path is None and zip_path is None: diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index 1bf503c25e..343b7e9683 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -2,19 +2,22 @@ """Code to deal with pipeline RO (Research Object) Crates""" import logging +import os import sys import tempfile from datetime import datetime from pathlib import Path -from typing import Set, Union, cast +from typing import Dict, List, Optional, Set, Union, cast from urllib.parse import quote import requests import rocrate.model.entity import rocrate.rocrate from git import GitCommandError, InvalidGitRepositoryError +from rich.progress import BarColumn, Progress from rocrate.model.person import Person +from nf_core.pipelines.schema import PipelineSchema from nf_core.utils import Pipeline log = logging.getLogger(__name__) @@ -39,10 +42,14 @@ def __init__(self, pipeline_dir: Path, version="") -> None: is_pipeline_directory(pipeline_dir) self.pipeline_dir = pipeline_dir - self.version = version + self.version: str = version self.crate: rocrate.rocrate.ROCrate self.pipeline_obj = Pipeline(str(self.pipeline_dir)) self.pipeline_obj._load() + self.pipeline_obj.schema_obj = PipelineSchema() + # Assume we're in a pipeline dir root if schema path not set + self.pipeline_obj.schema_obj.get_schema_path(self.pipeline_dir) + self.pipeline_obj.schema_obj.load_schema() setup_requests_cachedir() @@ -72,7 +79,7 @@ def create_ro_crate( os.chdir(self.pipeline_dir) # Check that the checkout pipeline version is the same as the requested version - if self.version: + if self.version != "": if self.version != self.pipeline_obj.nf_config.get("manifest.version"): # using git checkout to get the requested version log.info(f"Checking out pipeline version {self.version}") @@ -86,7 +93,7 @@ def create_ro_crate( except GitCommandError: log.error(f"Could not checkout version {self.version}") sys.exit(1) - + self.version = self.pipeline_obj.nf_config.get("manifest.version", "") self.make_workflow_ro_crate() # Save just the JSON metadata file @@ -169,38 +176,67 @@ def make_workflow_ro_crate(self) -> None: except FileNotFoundError: log.error(f"Could not find LICENSE file in {self.pipeline_dir}") - # add doi as identifier - self.crate.name = f'Research Object Crate for {self.pipeline_obj.nf_config.get("manifest.name")}' + self.crate.name = self.pipeline_obj.nf_config.get("manifest.name") + + self.crate.root_dataset.append_to("version", self.version, compact=True) - if "dev" in self.pipeline_obj.nf_config.get("manifest.version", ""): + if "dev" in self.version: self.crate.CreativeWorkStatus = "InProgress" else: self.crate.CreativeWorkStatus = "Stable" + tags = self.pipeline_obj.repo.tags + if tags: + # get the tag for this version + for tag in tags: + if tag.commit.hexsha == self.pipeline_obj.repo.head.commit.hexsha: + self.crate.root_dataset.append_to( + "dateCreated", tag.commit.committed_datetime.strftime("%Y-%m-%dT%H:%M:%SZ"), compact=True + ) - # Set main entity file - self.set_main_entity("main.nf") + self.crate.add_jsonld( + {"@id": "https://nf-co.re/", "@type": "Organization", "name": "nf-core", "url": "https://nf-co.re/"} + ) # Add all other files self.add_workflow_files() + # Set main entity file + self.set_main_entity("main.nf") + def set_main_entity(self, main_entity_filename: str): """ Set the main.nf as the main entity of the crate and add necessary metadata """ - wf_file = self.crate.add_jsonld( - { - "@id": main_entity_filename, - "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], - }, - ) # FIXME: this adds "#main.nf" to the crate, but it should be "main.nf" + wf_file = self.crate.add_file( + main_entity_filename, + properties={"@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"]}, + ) wf_file = cast(rocrate.model.entity.Entity, wf_file) # ro-crate is untyped so need to cast type manually - self.crate.mainEntity = wf_file - self.add_main_authors(wf_file) + wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}) wf_file.append_to("dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/") # add dateCreated and dateModified, based on the current data - wf_file.append_to("dateCreated", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) + wf_file.append_to("dateCreated", self.crate.get("dateCreated", ""), compact=True) wf_file.append_to("dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) + wf_file.append_to("sdPublisher", {"@id": "https://nf-co.re/"}) + if self.pipeline_obj.schema_obj is not None: + log.debug("input value") + + schema_input = self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["properties"][ + "input" + ] + input_value: Dict[str, Union[str, List[str], bool]] = { + "@type": ["PropertyValueSpecification", "FormalParameter"], + "default": schema_input.get("default", ""), + "encodingFormat": schema_input.get("mimetype", ""), + "valueRequired": "input" + in self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["required"], + "dct:conformsTo": "https://bioschemas.org/types/FormalParameter/1.0-RELEASE", + } + wf_file.append_to( + "input", + input_value, + ) # get keywords from nf-core website remote_workflows = requests.get("https://nf-co.re/pipelines.json").json()["remote_workflows"] @@ -214,6 +250,18 @@ def set_main_entity(self, main_entity_filename: str): log.debug(f"Adding topics: {topics}") wf_file.append_to("keywords", topics) + self.add_main_authors(wf_file) + + self.crate.mainEntity = wf_file + + wf_file.append_to("license", self.crate.license) + wf_file.append_to("name", self.crate.name) + + self.crate.add_file( + main_entity_filename, + properties=wf_file.properties(), + ) + def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: """ Add workflow authors to the crate @@ -224,43 +272,61 @@ def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: authors = self.pipeline_obj.nf_config["manifest.author"].split(",") # remove spaces authors = [a.strip() for a in authors] + # add manifest authors as maintainer to crate + except KeyError: log.error("No author field found in manifest of nextflow.config") return # look at git contributors for author names try: - contributors: Set[str] = set() + git_contributors: Set[str] = set() commits_touching_path = list(self.pipeline_obj.repo.iter_commits(paths="main.nf")) for commit in commits_touching_path: if commit.author.name is not None: - contributors.add(commit.author.name) + git_contributors.add(commit.author.name) # exclude bots - contributors = {c for c in contributors if not c.endswith("bot") and c != "Travis CI User"} + contributors = {c for c in git_contributors if not c.endswith("bot") and c != "Travis CI User"} log.debug(f"Found {len(contributors)} git authors") - for git_author in contributors: - git_author = requests.get(f"https://api.github.com/users/{git_author}").json().get("name", git_author) - if git_author is None: - log.debug(f"Could not find name for {git_author}") - continue - - if git_author not in authors: - authors.append(git_author) + + progress_bar = Progress( + "[bold blue]{task.description}", + BarColumn(bar_width=None), + "[magenta]{task.completed} of {task.total}[reset] » [bold yellow]{task.fields[test_name]}", + transient=True, + disable=os.environ.get("HIDE_PROGRESS", None) is not None, + ) + with progress_bar: + bump_progress = progress_bar.add_task( + "Searching for author names on GitHub", total=len(contributors), test_name="" + ) + + for git_author in contributors: + progress_bar.update(bump_progress, advance=1, test_name=git_author) + git_author = ( + requests.get(f"https://api.github.com/users/{git_author}").json().get("name", git_author) + ) + if git_author is None: + log.debug(f"Could not find name for {git_author}") + continue + except AttributeError: - log.debug("Could not find git authors") + log.debug("Could not find git contributors") # remove usernames (just keep names with spaces) - authors = [c for c in authors if " " in c] + named_contributors = {c for c in contributors if " " in c} - for author in authors: + for author in named_contributors: log.debug(f"Adding author: {author}") orcid = get_orcid(author) author_entitity = self.crate.add( Person(self.crate, orcid if orcid is not None else "#" + quote(author), properties={"name": author}) ) wf_file.append_to("creator", author_entitity) + if author in authors: + wf_file.append_to("maintainer", author_entitity) def add_workflow_files(self): """ @@ -309,8 +375,6 @@ def add_workflow_files(self): def set_crate_paths(self, path: Path) -> None: """Given a pipeline name, directory, or path, set wf_crate_filename""" - path = Path(path) - if path.is_dir(): self.pipeline_dir = path # wf_crate_filename = path / "ro-crate-metadata.json" @@ -323,7 +387,7 @@ def set_crate_paths(self, path: Path) -> None: raise OSError(f"Could not find pipeline '{path}'") -def get_orcid(name: str) -> Union[str, None]: +def get_orcid(name: str) -> Optional[str]: """ Get the ORCID for a given name From fd54944517671c82f8f7122ae966fce3c6025c7e Mon Sep 17 00:00:00 2001 From: mashehu Date: Mon, 5 Aug 2024 13:59:10 +0200 Subject: [PATCH 30/65] fix date created field --- nf_core/pipelines/rocrate.py | 17 ++++------------- tests/test_rocrate.py | 4 ++-- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index 343b7e9683..c71bf572a8 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -98,9 +98,9 @@ def create_ro_crate( # Save just the JSON metadata file if metadata_path is not None: - log.info(f"Saving metadata file '{metadata_path}'") + log.info(f"Saving metadata file to '{metadata_path}'") # Save the crate to a temporary directory - tmpdir = Path(tempfile.mkdtemp(), "wf") + tmpdir = Path(tempfile.TemporaryDirectory().name) self.crate.write(tmpdir) # Now save just the JSON file crate_json_fn = Path(tmpdir, "ro-crate-metadata.json") @@ -199,7 +199,6 @@ def make_workflow_ro_crate(self) -> None: # Add all other files self.add_workflow_files() - # Set main entity file self.set_main_entity("main.nf") @@ -216,7 +215,7 @@ def set_main_entity(self, main_entity_filename: str): wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}) wf_file.append_to("dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/") # add dateCreated and dateModified, based on the current data - wf_file.append_to("dateCreated", self.crate.get("dateCreated", ""), compact=True) + wf_file.append_to("dateCreated", self.crate.root_dataset.get("dateCreated", ""), compact=True) wf_file.append_to("dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) wf_file.append_to("sdPublisher", {"@id": "https://nf-co.re/"}) if self.pipeline_obj.schema_obj is not None: @@ -257,11 +256,6 @@ def set_main_entity(self, main_entity_filename: str): wf_file.append_to("license", self.crate.license) wf_file.append_to("name", self.crate.name) - self.crate.add_file( - main_entity_filename, - properties=wf_file.properties(), - ) - def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: """ Add workflow authors to the crate @@ -338,12 +332,9 @@ def add_workflow_files(self): wf_filenames = nf_core.utils.get_wf_files(Path.cwd()) # exclude github action files - wf_filenames = [fn for fn in wf_filenames if not fn.startswith(".github/")] + wf_filenames = [fn for fn in wf_filenames if not fn.startswith(".github/") and not fn == "main.nf"] log.debug(f"Adding {len(wf_filenames)} workflow files") for fn in wf_filenames: - # skip main.nf - if fn == "main.nf": - continue # add nextflow language to .nf and .config files if fn.endswith(".nf") or fn.endswith(".config") or fn.endswith(".nf.test"): log.debug(f"Adding workflow file: {fn}") diff --git a/tests/test_rocrate.py b/tests/test_rocrate.py index fe3cf2e8e0..eac61fac48 100644 --- a/tests/test_rocrate.py +++ b/tests/test_rocrate.py @@ -67,8 +67,8 @@ def test_rocrate_creation(self): for entity in entities: entity_json = entity.as_jsonld() if entity_json["@id"] == "./": - self.assertEqual(entity_json.get("name"), "Research Object Crate for nf-core/testpipeline") - self.assertEqual(entity_json["mainEntity"], {"@id": "#main.nf"}) + self.assertEqual(entity_json.get("name"), "nf-core/testpipeline") + self.assertEqual(entity_json["mainEntity"], {"@id": "main.nf"}) elif entity_json["@id"] == "#main.nf": self.assertEqual(entity_json["programmingLanguage"], [{"@id": "#nextflow"}]) self.assertEqual(entity_json["image"], [{"@id": "nf-core-testpipeline_metro_map.png"}]) From b875e7e2b79feb652ae323e26ce7db257f40ff6a Mon Sep 17 00:00:00 2001 From: mashehu Date: Mon, 5 Aug 2024 14:43:12 +0200 Subject: [PATCH 31/65] add "about" field to workflow diagram --- nf_core/pipelines/rocrate.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index c71bf572a8..c559fa3bc7 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -197,10 +197,10 @@ def make_workflow_ro_crate(self) -> None: {"@id": "https://nf-co.re/", "@type": "Organization", "name": "nf-core", "url": "https://nf-co.re/"} ) - # Add all other files - self.add_workflow_files() # Set main entity file self.set_main_entity("main.nf") + # Add all other files + self.add_workflow_files() def set_main_entity(self, main_entity_filename: str): """ @@ -342,7 +342,7 @@ def add_workflow_files(self): continue if fn.endswith(".png"): log.debug(f"Adding workflow image file: {fn}") - self.crate.add_jsonld({"@id": Path(fn).name, "@type": ["File", "ImageObject"]}) + self.crate.add_jsonld({"@id": fn, "@type": ["File", "ImageObject"]}) if re.search(r"(metro|tube)_?(map)?", fn) and self.crate.mainEntity is not None: log.info(f"Setting main entity image to: {fn}") # check if image is set in main entity @@ -352,6 +352,7 @@ def add_workflow_files(self): ) else: log.info(f"Setting main entity image to: {fn}") + self.crate.update_jsonld({"@id": "#" + fn, "about": {"@id": self.crate.mainEntity.id}}) self.crate.mainEntity.append_to("image", {"@id": Path(fn).name}) continue if fn.endswith(".md"): From c304ab20a71923914ec42b4e3cbfc1b2027b8dc4 Mon Sep 17 00:00:00 2001 From: mashehu Date: Mon, 5 Aug 2024 15:58:16 +0200 Subject: [PATCH 32/65] add input as separate entity --- nf_core/pipelines/rocrate.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index c559fa3bc7..8d7fd1750c 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -218,6 +218,11 @@ def set_main_entity(self, main_entity_filename: str): wf_file.append_to("dateCreated", self.crate.root_dataset.get("dateCreated", ""), compact=True) wf_file.append_to("dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) wf_file.append_to("sdPublisher", {"@id": "https://nf-co.re/"}) + if self.version.endswith("dev"): + url = "dev" + else: + url = self.version + wf_file.append_to("url", {"@id": f"https://nf-co.re/{self.crate.name.replace('nf-core/','')}/{url}/"}) if self.pipeline_obj.schema_obj is not None: log.debug("input value") @@ -225,6 +230,7 @@ def set_main_entity(self, main_entity_filename: str): "input" ] input_value: Dict[str, Union[str, List[str], bool]] = { + "@id": "#input", "@type": ["PropertyValueSpecification", "FormalParameter"], "default": schema_input.get("default", ""), "encodingFormat": schema_input.get("mimetype", ""), @@ -232,9 +238,10 @@ def set_main_entity(self, main_entity_filename: str): in self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["required"], "dct:conformsTo": "https://bioschemas.org/types/FormalParameter/1.0-RELEASE", } + self.crate.add_jsonld(input_value) wf_file.append_to( "input", - input_value, + {"@id": "#input"}, ) # get keywords from nf-core website @@ -336,10 +343,10 @@ def add_workflow_files(self): log.debug(f"Adding {len(wf_filenames)} workflow files") for fn in wf_filenames: # add nextflow language to .nf and .config files - if fn.endswith(".nf") or fn.endswith(".config") or fn.endswith(".nf.test"): - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn, properties={"programmingLanguage": {"@id": "#nextflow"}}) - continue + # if fn.endswith(".nf") or fn.endswith(".config") or fn.endswith(".nf.test") and not fn.endswith("main.nf"): + # log.debug(f"Adding workflow file: {fn}") + # self.crate.add_file(fn, properties={"programmingLanguage": {"@id": "#nextflow"}}) + # continue if fn.endswith(".png"): log.debug(f"Adding workflow image file: {fn}") self.crate.add_jsonld({"@id": fn, "@type": ["File", "ImageObject"]}) @@ -359,10 +366,10 @@ def add_workflow_files(self): log.debug(f"Adding workflow file: {fn}") self.crate.add_file(fn, properties={"encodingFormat": "text/markdown"}) continue - else: - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn) - continue + # else: + # log.debug(f"Adding workflow file: {fn}") + # self.crate.add_file(fn) + # continue def set_crate_paths(self, path: Path) -> None: """Given a pipeline name, directory, or path, set wf_crate_filename""" From ba3bd833c1628f6c12fc6f4a82b0d9e9ab6e9d41 Mon Sep 17 00:00:00 2001 From: mashehu Date: Mon, 5 Aug 2024 16:33:50 +0200 Subject: [PATCH 33/65] add version to main entity --- nf_core/pipelines/rocrate.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index 8d7fd1750c..0bb573da81 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -212,17 +212,20 @@ def set_main_entity(self, main_entity_filename: str): ) wf_file = cast(rocrate.model.entity.Entity, wf_file) # ro-crate is untyped so need to cast type manually - wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}) - wf_file.append_to("dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/") + wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}, compact=True) + wf_file.append_to( + "dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", compact=True + ) # add dateCreated and dateModified, based on the current data wf_file.append_to("dateCreated", self.crate.root_dataset.get("dateCreated", ""), compact=True) wf_file.append_to("dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) - wf_file.append_to("sdPublisher", {"@id": "https://nf-co.re/"}) + wf_file.append_to("sdPublisher", {"@id": "https://nf-co.re/"}, compact=True) if self.version.endswith("dev"): url = "dev" else: url = self.version - wf_file.append_to("url", {"@id": f"https://nf-co.re/{self.crate.name.replace('nf-core/','')}/{url}/"}) + wf_file.append_to("url", f"https://nf-co.re/{self.crate.name.replace('nf-core/','')}/{url}/", compact=True) + wf_file.append_to("version", self.version, compact=True) if self.pipeline_obj.schema_obj is not None: log.debug("input value") @@ -351,7 +354,6 @@ def add_workflow_files(self): log.debug(f"Adding workflow image file: {fn}") self.crate.add_jsonld({"@id": fn, "@type": ["File", "ImageObject"]}) if re.search(r"(metro|tube)_?(map)?", fn) and self.crate.mainEntity is not None: - log.info(f"Setting main entity image to: {fn}") # check if image is set in main entity if self.crate.mainEntity.get("image"): log.info( From ec7a334f3ea4d927784f5718deef480973a1b5b9 Mon Sep 17 00:00:00 2001 From: mashehu Date: Mon, 5 Aug 2024 16:47:58 +0200 Subject: [PATCH 34/65] update snapshot --- tests/__snapshots__/test_create_app.ambr | 789 +---------------------- 1 file changed, 12 insertions(+), 777 deletions(-) diff --git a/tests/__snapshots__/test_create_app.ambr b/tests/__snapshots__/test_create_app.ambr index 2ad0772587..158dac0d2e 100644 --- a/tests/__snapshots__/test_create_app.ambr +++ b/tests/__snapshots__/test_create_app.ambr @@ -1382,567 +1382,9 @@ ''' # --- -# name: test_github_details - ''' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nf-core create - - - - - - - - - - nf-core create — Create a new pipeline with the nf-core pipeline template - - - Create GitHub repository - -   Now that we have created a new pipeline locally, we can create a new GitHub repository and push    -   the code to it. - - - - - Your GitHub usernameYour GitHub personal access token▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ - for login. Show  - ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ - GitHub username••••••••••••                   - ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ - - - - The name of the organisation where the The name of the new GitHub repository - GitHub repo will be cretaed - ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ - nf-core                               mypipeline                             - ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ - - - ⚠️ You can't create a repository directly in the nf-core organisation. - Please create the pipeline repo to an organisation where you have access or use your user  - account. A core-team member will be able to transfer the repo to nf-core once the development - has started. - - 💡 Your GitHub user account will be used by default if nf-core is given as the org name. - - - ▔▔▔▔▔▔▔▔Private - Select to make the new GitHub repo private. - ▁▁▁▁▁▁▁▁ - ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ -  Back  Create GitHub repo  Finish without creating a repo  - ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ - - - - - - - - - -  d Toggle dark mode  q Quit  - - - - - ''' -# --- -# name: test_github_exit_message - ''' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nf-core create - - - - - - - - - - nf-core create — Create a new pipeline with the nf-core pipeline template - - - HowTo create a GitHub repository - - - -                                           ,--./,-. -           ___     __   __   __   ___     /,-._.--~\  -     |\ | |__  __ /  ` /  \ |__) |__         }  { -     | \| |       \__, \__/ |  \ |___     \`-._,-`-, -                                           `._,._,' - -   If you would like to create the GitHub repository later, you can do it manually by following  -   these steps: - -  1. Create a new GitHub repository -  2. Add the remote to your local repository: - - - cd <pipeline_directory> - git remote add origin git@github.com:<username>/<repo_name>.git - - -  3. Push the code to the remote: - - - git push --all origin - - - 💡 Note the --all flag: this is needed to push all branches to the remote. - - - - ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ -  Close  - ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ - - - - - - - - - - - - -  d Toggle dark mode  q Quit  - - - - - ''' -# --- # name: test_github_question ''' - + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - nf-core create + nf-core create - - - - nf-core create — Create a new pipeline with the nf-core pipeline template - - - Create GitHub repository - - -   After creating the pipeline template locally, we can create a GitHub repository and push the  -   code to it. - -   Do you want to create a GitHub repository? - - - ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ -  Create GitHub repo  Finish without creating a repo  - ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  d Toggle dark mode  q Quit  + + + + From 501aa74d0ad253c0876658c84f3b41f1c6b54f51 Mon Sep 17 00:00:00 2001 From: mashehu Date: Mon, 5 Aug 2024 16:53:00 +0200 Subject: [PATCH 35/65] update snapshot with py 3.12 --- tests/__snapshots__/test_create_app.ambr | 255 ++++++++++++----------- 1 file changed, 128 insertions(+), 127 deletions(-) diff --git a/tests/__snapshots__/test_create_app.ambr b/tests/__snapshots__/test_create_app.ambr index 158dac0d2e..d1fe59bf2d 100644 --- a/tests/__snapshots__/test_create_app.ambr +++ b/tests/__snapshots__/test_create_app.ambr @@ -2022,255 +2022,256 @@ font-weight: 700; } - .terminal-2071915686-matrix { + .terminal-4126010156-matrix { font-family: Fira Code, monospace; font-size: 20px; line-height: 24.4px; font-variant-east-asian: full-width; } - .terminal-2071915686-title { + .terminal-4126010156-title { font-size: 18px; font-weight: bold; font-family: arial; } - .terminal-2071915686-r1 { fill: #c5c8c6 } - .terminal-2071915686-r2 { fill: #e3e3e3 } - .terminal-2071915686-r3 { fill: #989898 } - .terminal-2071915686-r4 { fill: #e1e1e1 } - .terminal-2071915686-r5 { fill: #4ebf71;font-weight: bold } - .terminal-2071915686-r6 { fill: #a5a5a5;font-style: italic; } - .terminal-2071915686-r7 { fill: #1e1e1e } - .terminal-2071915686-r8 { fill: #0f4e2a } - .terminal-2071915686-r9 { fill: #7b3042 } - .terminal-2071915686-r10 { fill: #a7a7a7 } - .terminal-2071915686-r11 { fill: #787878 } - .terminal-2071915686-r12 { fill: #e2e2e2 } - .terminal-2071915686-r13 { fill: #b93c5b } - .terminal-2071915686-r14 { fill: #454a50 } - .terminal-2071915686-r15 { fill: #7ae998 } - .terminal-2071915686-r16 { fill: #e2e3e3;font-weight: bold } - .terminal-2071915686-r17 { fill: #000000 } - .terminal-2071915686-r18 { fill: #008139 } - .terminal-2071915686-r19 { fill: #fea62b;font-weight: bold } - .terminal-2071915686-r20 { fill: #a7a9ab } - .terminal-2071915686-r21 { fill: #e2e3e3 } + .terminal-4126010156-r1 { fill: #c5c8c6 } + .terminal-4126010156-r2 { fill: #e3e3e3 } + .terminal-4126010156-r3 { fill: #989898 } + .terminal-4126010156-r4 { fill: #e1e1e1 } + .terminal-4126010156-r5 { fill: #4ebf71;font-weight: bold } + .terminal-4126010156-r6 { fill: #a5a5a5;font-style: italic; } + .terminal-4126010156-r7 { fill: #1e1e1e } + .terminal-4126010156-r8 { fill: #0f4e2a } + .terminal-4126010156-r9 { fill: #7b3042 } + .terminal-4126010156-r10 { fill: #a7a7a7 } + .terminal-4126010156-r11 { fill: #787878 } + .terminal-4126010156-r12 { fill: #e2e2e2 } + .terminal-4126010156-r13 { fill: #b93c5b } + .terminal-4126010156-r14 { fill: #454a50 } + .terminal-4126010156-r15 { fill: #166d39 } + .terminal-4126010156-r16 { fill: #e2e3e3;font-weight: bold } + .terminal-4126010156-r17 { fill: #3c8b54;font-weight: bold } + .terminal-4126010156-r18 { fill: #000000 } + .terminal-4126010156-r19 { fill: #5aa86f } + .terminal-4126010156-r20 { fill: #fea62b;font-weight: bold } + .terminal-4126010156-r21 { fill: #a7a9ab } + .terminal-4126010156-r22 { fill: #e2e3e3 } - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - nf-core create + nf-core create - - - - nf-core create — Create a new pipeline with the nf-core pipeline template - - - Basic details - - - - - GitHub organisationWorkflow name - - ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ - nf-core                                   Pipeline Name - ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ - Value error, Must be lowercase without  - punctuation. - - - - A short description of your pipeline. - - ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ - Description - ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ - Value error, Cannot be left empty. - - - - Name of the main author / authors - - ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ - Author(s) - ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ - Value error, Cannot be left empty. - - ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ -  Back  Next  - ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ - - - - - - - - - - - - -  d Toggle dark mode  q Quit  + + + + nf-core create — Create a new pipeline with the nf-core pipeline template + + + Basic details + + + + + GitHub organisationWorkflow name + + ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ + nf-core                                   Pipeline Name + ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + Value error, Must be lowercase without  + punctuation. + + + + A short description of your pipeline. + + ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ + Description + ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + Value error, Cannot be left empty. + + + + Name of the main author / authors + + ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ + Author(s) + ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + Value error, Cannot be left empty. + + ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +  Back  Next  + ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + + + + + + + + + + + + +  d Toggle dark mode  q Quit  From 26cf2e87355569c20540a2ae50f40b175b3b972e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20H=C3=B6rtenhuber?= Date: Mon, 5 Aug 2024 15:31:04 +0000 Subject: [PATCH 36/65] update snapshots in codespaces --- tests/__snapshots__/test_create_app.ambr | 1044 +++++++++++++++++++--- 1 file changed, 904 insertions(+), 140 deletions(-) diff --git a/tests/__snapshots__/test_create_app.ambr b/tests/__snapshots__/test_create_app.ambr index d1fe59bf2d..2ad0772587 100644 --- a/tests/__snapshots__/test_create_app.ambr +++ b/tests/__snapshots__/test_create_app.ambr @@ -1382,9 +1382,567 @@ ''' # --- +# name: test_github_details + ''' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nf-core create + + + + + + + + + + nf-core create — Create a new pipeline with the nf-core pipeline template + + + Create GitHub repository + +   Now that we have created a new pipeline locally, we can create a new GitHub repository and push    +   the code to it. + + + + + Your GitHub usernameYour GitHub personal access token▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ + for login. Show  + ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + GitHub username••••••••••••                   + ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + + + + The name of the organisation where the The name of the new GitHub repository + GitHub repo will be cretaed + ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ + nf-core                               mypipeline                             + ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + + + ⚠️ You can't create a repository directly in the nf-core organisation. + Please create the pipeline repo to an organisation where you have access or use your user  + account. A core-team member will be able to transfer the repo to nf-core once the development + has started. + + 💡 Your GitHub user account will be used by default if nf-core is given as the org name. + + + ▔▔▔▔▔▔▔▔Private + Select to make the new GitHub repo private. + ▁▁▁▁▁▁▁▁ + ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +  Back  Create GitHub repo  Finish without creating a repo  + ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + + + + + + + + + +  d Toggle dark mode  q Quit  + + + + + ''' +# --- +# name: test_github_exit_message + ''' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nf-core create + + + + + + + + + + nf-core create — Create a new pipeline with the nf-core pipeline template + + + HowTo create a GitHub repository + + + +                                           ,--./,-. +           ___     __   __   __   ___     /,-._.--~\  +     |\ | |__  __ /  ` /  \ |__) |__         }  { +     | \| |       \__, \__/ |  \ |___     \`-._,-`-, +                                           `._,._,' + +   If you would like to create the GitHub repository later, you can do it manually by following  +   these steps: + +  1. Create a new GitHub repository +  2. Add the remote to your local repository: + + + cd <pipeline_directory> + git remote add origin git@github.com:<username>/<repo_name>.git + + +  3. Push the code to the remote: + + + git push --all origin + + + 💡 Note the --all flag: this is needed to push all branches to the remote. + + + + ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +  Close  + ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + + + + + + + + + + + + +  d Toggle dark mode  q Quit  + + + + + ''' +# --- # name: test_github_question ''' - + - - + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - nf-core create + nf-core create - - - - + + + + nf-core create — Create a new pipeline with the nf-core pipeline template + + + Create GitHub repository + + +   After creating the pipeline template locally, we can create a GitHub repository and push the  +   code to it. + +   Do you want to create a GitHub repository? + + + ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +  Create GitHub repo  Finish without creating a repo  + ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +  d Toggle dark mode  q Quit  @@ -2022,256 +2787,255 @@ font-weight: 700; } - .terminal-4126010156-matrix { + .terminal-2071915686-matrix { font-family: Fira Code, monospace; font-size: 20px; line-height: 24.4px; font-variant-east-asian: full-width; } - .terminal-4126010156-title { + .terminal-2071915686-title { font-size: 18px; font-weight: bold; font-family: arial; } - .terminal-4126010156-r1 { fill: #c5c8c6 } - .terminal-4126010156-r2 { fill: #e3e3e3 } - .terminal-4126010156-r3 { fill: #989898 } - .terminal-4126010156-r4 { fill: #e1e1e1 } - .terminal-4126010156-r5 { fill: #4ebf71;font-weight: bold } - .terminal-4126010156-r6 { fill: #a5a5a5;font-style: italic; } - .terminal-4126010156-r7 { fill: #1e1e1e } - .terminal-4126010156-r8 { fill: #0f4e2a } - .terminal-4126010156-r9 { fill: #7b3042 } - .terminal-4126010156-r10 { fill: #a7a7a7 } - .terminal-4126010156-r11 { fill: #787878 } - .terminal-4126010156-r12 { fill: #e2e2e2 } - .terminal-4126010156-r13 { fill: #b93c5b } - .terminal-4126010156-r14 { fill: #454a50 } - .terminal-4126010156-r15 { fill: #166d39 } - .terminal-4126010156-r16 { fill: #e2e3e3;font-weight: bold } - .terminal-4126010156-r17 { fill: #3c8b54;font-weight: bold } - .terminal-4126010156-r18 { fill: #000000 } - .terminal-4126010156-r19 { fill: #5aa86f } - .terminal-4126010156-r20 { fill: #fea62b;font-weight: bold } - .terminal-4126010156-r21 { fill: #a7a9ab } - .terminal-4126010156-r22 { fill: #e2e3e3 } + .terminal-2071915686-r1 { fill: #c5c8c6 } + .terminal-2071915686-r2 { fill: #e3e3e3 } + .terminal-2071915686-r3 { fill: #989898 } + .terminal-2071915686-r4 { fill: #e1e1e1 } + .terminal-2071915686-r5 { fill: #4ebf71;font-weight: bold } + .terminal-2071915686-r6 { fill: #a5a5a5;font-style: italic; } + .terminal-2071915686-r7 { fill: #1e1e1e } + .terminal-2071915686-r8 { fill: #0f4e2a } + .terminal-2071915686-r9 { fill: #7b3042 } + .terminal-2071915686-r10 { fill: #a7a7a7 } + .terminal-2071915686-r11 { fill: #787878 } + .terminal-2071915686-r12 { fill: #e2e2e2 } + .terminal-2071915686-r13 { fill: #b93c5b } + .terminal-2071915686-r14 { fill: #454a50 } + .terminal-2071915686-r15 { fill: #7ae998 } + .terminal-2071915686-r16 { fill: #e2e3e3;font-weight: bold } + .terminal-2071915686-r17 { fill: #000000 } + .terminal-2071915686-r18 { fill: #008139 } + .terminal-2071915686-r19 { fill: #fea62b;font-weight: bold } + .terminal-2071915686-r20 { fill: #a7a9ab } + .terminal-2071915686-r21 { fill: #e2e3e3 } - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - nf-core create + nf-core create - - - - nf-core create — Create a new pipeline with the nf-core pipeline template - - - Basic details - - - - - GitHub organisationWorkflow name - - ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ - nf-core                                   Pipeline Name - ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ - Value error, Must be lowercase without  - punctuation. - - - - A short description of your pipeline. - - ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ - Description - ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ - Value error, Cannot be left empty. - - - - Name of the main author / authors - - ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ - Author(s) - ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ - Value error, Cannot be left empty. - - ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ -  Back  Next  - ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ - - - - - - - - - - - - -  d Toggle dark mode  q Quit  + + + + nf-core create — Create a new pipeline with the nf-core pipeline template + + + Basic details + + + + + GitHub organisationWorkflow name + + ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ + nf-core                                   Pipeline Name + ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + Value error, Must be lowercase without  + punctuation. + + + + A short description of your pipeline. + + ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ + Description + ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + Value error, Cannot be left empty. + + + + Name of the main author / authors + + ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ + Author(s) + ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + Value error, Cannot be left empty. + + ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +  Back  Next  + ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + + + + + + + + + + + + +  d Toggle dark mode  q Quit  From e1061de086841d50344db910f8111c90d73d868c Mon Sep 17 00:00:00 2001 From: mashehu Date: Fri, 16 Aug 2024 09:14:34 +0200 Subject: [PATCH 37/65] fix incorrect type --- nf_core/modules/modules_json.py | 2 +- nf_core/pipelines/rocrate.py | 154 +++++++++++++++++--------------- nf_core/utils.py | 6 +- 3 files changed, 89 insertions(+), 73 deletions(-) diff --git a/nf_core/modules/modules_json.py b/nf_core/modules/modules_json.py index e0b741930f..0dbd87f776 100644 --- a/nf_core/modules/modules_json.py +++ b/nf_core/modules/modules_json.py @@ -432,7 +432,7 @@ def move_component_to_local(self, component_type: str, component: str, repo_name to_name += f"-{datetime.datetime.now().strftime('%y%m%d%H%M%S')}" shutil.move(str(current_path), local_dir / to_name) - def unsynced_components(self) -> Tuple[List[str], List[str], dict]: + def unsynced_components(self) -> Tuple[List[str], List[str], Dict]: """ Compute the difference between the modules/subworkflows in the directory and the modules/subworkflows in the 'modules.json' file. This is done by looking at all diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index 0bb573da81..67c109a86f 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -5,9 +5,8 @@ import os import sys import tempfile -from datetime import datetime from pathlib import Path -from typing import Dict, List, Optional, Set, Union, cast +from typing import Optional, Set, Union from urllib.parse import quote import requests @@ -44,7 +43,7 @@ def __init__(self, pipeline_dir: Path, version="") -> None: self.pipeline_dir = pipeline_dir self.version: str = version self.crate: rocrate.rocrate.ROCrate - self.pipeline_obj = Pipeline(str(self.pipeline_dir)) + self.pipeline_obj = Pipeline(self.pipeline_dir) self.pipeline_obj._load() self.pipeline_obj.schema_obj = PipelineSchema() # Assume we're in a pipeline dir root if schema path not set @@ -83,9 +82,12 @@ def create_ro_crate( if self.version != self.pipeline_obj.nf_config.get("manifest.version"): # using git checkout to get the requested version log.info(f"Checking out pipeline version {self.version}") + if self.pipeline_obj.repo is None: + log.error(f"Pipeline repository not found in {self.pipeline_dir}") + sys.exit(1) try: self.pipeline_obj.repo.git.checkout(self.version) - self.pipeline_obj = Pipeline(str(self.pipeline_dir)) + self.pipeline_obj = Pipeline(self.pipeline_dir) self.pipeline_obj._load() except InvalidGitRepositoryError: log.error(f"Could not find a git repository in {self.pipeline_dir}") @@ -184,14 +186,19 @@ def make_workflow_ro_crate(self) -> None: self.crate.CreativeWorkStatus = "InProgress" else: self.crate.CreativeWorkStatus = "Stable" - tags = self.pipeline_obj.repo.tags - if tags: - # get the tag for this version - for tag in tags: - if tag.commit.hexsha == self.pipeline_obj.repo.head.commit.hexsha: - self.crate.root_dataset.append_to( - "dateCreated", tag.commit.committed_datetime.strftime("%Y-%m-%dT%H:%M:%SZ"), compact=True - ) + if self.pipeline_obj.repo is None: + log.error(f"Pipeline repository not found in {self.pipeline_dir}") + else: + tags = self.pipeline_obj.repo.tags + if tags: + # get the tag for this version + for tag in tags: + if tag.commit.hexsha == self.pipeline_obj.repo.head.commit.hexsha: + self.crate.root_dataset.append_to( + "dateCreated", + tag.commit.committed_datetime.strftime("%Y-%m-%dT%H:%M:%SZ"), + compact=True, + ) self.crate.add_jsonld( {"@id": "https://nf-co.re/", "@type": "Organization", "name": "nf-core", "url": "https://nf-co.re/"} @@ -206,65 +213,72 @@ def set_main_entity(self, main_entity_filename: str): """ Set the main.nf as the main entity of the crate and add necessary metadata """ - wf_file = self.crate.add_file( + self.crate.add_workflow( # sets @type and conformsTo according to Workflow RO-Crate spec main_entity_filename, - properties={"@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"]}, + dest_path=main_entity_filename, + main=True, + lang="nextflow", # adds the #nextflow entity automatically and connects it to programmingLanguage + lang_version="X.Y.Z", # sets version on #nextflow ) - wf_file = cast(rocrate.model.entity.Entity, wf_file) # ro-crate is untyped so need to cast type manually - - wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}, compact=True) - wf_file.append_to( - "dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", compact=True - ) - # add dateCreated and dateModified, based on the current data - wf_file.append_to("dateCreated", self.crate.root_dataset.get("dateCreated", ""), compact=True) - wf_file.append_to("dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) - wf_file.append_to("sdPublisher", {"@id": "https://nf-co.re/"}, compact=True) - if self.version.endswith("dev"): - url = "dev" - else: - url = self.version - wf_file.append_to("url", f"https://nf-co.re/{self.crate.name.replace('nf-core/','')}/{url}/", compact=True) - wf_file.append_to("version", self.version, compact=True) - if self.pipeline_obj.schema_obj is not None: - log.debug("input value") - - schema_input = self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["properties"][ - "input" - ] - input_value: Dict[str, Union[str, List[str], bool]] = { - "@id": "#input", - "@type": ["PropertyValueSpecification", "FormalParameter"], - "default": schema_input.get("default", ""), - "encodingFormat": schema_input.get("mimetype", ""), - "valueRequired": "input" - in self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["required"], - "dct:conformsTo": "https://bioschemas.org/types/FormalParameter/1.0-RELEASE", - } - self.crate.add_jsonld(input_value) - wf_file.append_to( - "input", - {"@id": "#input"}, - ) - - # get keywords from nf-core website - remote_workflows = requests.get("https://nf-co.re/pipelines.json").json()["remote_workflows"] - # go through all remote workflows and find the one that matches the pipeline name - topics = ["nf-core", "nextflow"] - for remote_wf in remote_workflows: - if remote_wf["name"] == self.pipeline_obj.pipeline_name.replace("nf-core/", ""): - topics = topics + remote_wf["topics"] - break - - log.debug(f"Adding topics: {topics}") - wf_file.append_to("keywords", topics) - - self.add_main_authors(wf_file) - - self.crate.mainEntity = wf_file - - wf_file.append_to("license", self.crate.license) - wf_file.append_to("name", self.crate.name) + # wf_file = self.crate.add_file( + # main_entity_filename, + # properties={"@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"]}, + # ) + # wf_file = cast(rocrate.model.entity.Entity, wf_file) # ro-crate is untyped so need to cast type manually + + # wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}, compact=True) + # wf_file.append_to( + # "dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", compact=True + # ) + # # add dateCreated and dateModified, based on the current data + # wf_file.append_to("dateCreated", self.crate.root_dataset.get("dateCreated", ""), compact=True) + # wf_file.append_to("dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) + # wf_file.append_to("sdPublisher", {"@id": "https://nf-co.re/"}, compact=True) + # if self.version.endswith("dev"): + # url = "dev" + # else: + # url = self.version + # wf_file.append_to("url", f"https://nf-co.re/{self.crate.name.replace('nf-core/','')}/{url}/", compact=True) + # wf_file.append_to("version", self.version, compact=True) + # if self.pipeline_obj.schema_obj is not None: + # log.debug("input value") + + # schema_input = self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["properties"][ + # "input" + # ] + # input_value: Dict[str, Union[str, List[str], bool]] = { + # "@id": "#input", + # "@type": ["PropertyValueSpecification", "FormalParameter"], + # "default": schema_input.get("default", ""), + # "encodingFormat": schema_input.get("mimetype", ""), + # "valueRequired": "input" + # in self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["required"], + # "dct:conformsTo": "https://bioschemas.org/types/FormalParameter/1.0-RELEASE", + # } + # self.crate.add_jsonld(input_value) + # wf_file.append_to( + # "input", + # {"@id": "#input"}, + # ) + + # # get keywords from nf-core website + # remote_workflows = requests.get("https://nf-co.re/pipelines.json").json()["remote_workflows"] + # # go through all remote workflows and find the one that matches the pipeline name + # topics = ["nf-core", "nextflow"] + # for remote_wf in remote_workflows: + # if remote_wf["name"] == self.pipeline_obj.pipeline_name.replace("nf-core/", ""): + # topics = topics + remote_wf["topics"] + # break + + # log.debug(f"Adding topics: {topics}") + # wf_file.append_to("keywords", topics) + + # self.add_main_authors(wf_file) + + # self.crate.mainEntity = wf_file + + # wf_file.append_to("license", self.crate.license) + # wf_file.append_to("name", self.crate.name) def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: """ @@ -284,7 +298,7 @@ def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: # look at git contributors for author names try: git_contributors: Set[str] = set() - + assert self.pipeline_obj.repo is not None # mypy commits_touching_path = list(self.pipeline_obj.repo.iter_commits(paths="main.nf")) for commit in commits_touching_path: diff --git a/nf_core/utils.py b/nf_core/utils.py index 3778f8acb3..21f44e40e0 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -35,6 +35,7 @@ from rich.spinner import Spinner import nf_core +from nf_core.pipelines.schema import PipelineSchema log = logging.getLogger(__name__) @@ -165,7 +166,8 @@ def __init__(self, wf_path: Path) -> None: self.wf_path = Path(wf_path) self.pipeline_name: Optional[str] = None self.pipeline_prefix: Optional[str] = None - self.schema_obj: Optional[Dict] = None + self.schema_obj: Optional[PipelineSchema] = None + self.repo: Optional[git.Repo] = None try: repo = git.Repo(self.wf_path) @@ -1205,7 +1207,7 @@ def get_first_available_path(directory: Union[Path, str], paths: List[str]) -> U return None -def sort_dictionary(d: dict) -> dict: +def sort_dictionary(d: Dict) -> Dict: """Sorts a nested dictionary recursively""" result = {} for k, v in sorted(d.items()): From 7793a9f175359f310df2e6de3c257b96c3745072 Mon Sep 17 00:00:00 2001 From: mashehu Date: Fri, 16 Aug 2024 12:15:51 +0200 Subject: [PATCH 38/65] switch to add_workflow method to add main entity and add components as datasets with descriptions --- nf_core/pipelines/rocrate.py | 192 +++++++++++++++++++---------------- nf_core/utils.py | 10 +- 2 files changed, 113 insertions(+), 89 deletions(-) diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index 67c109a86f..cb5672777d 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -5,8 +5,9 @@ import os import sys import tempfile +from datetime import datetime from pathlib import Path -from typing import Optional, Set, Union +from typing import Dict, List, Optional, Set, Union from urllib.parse import quote import requests @@ -133,20 +134,6 @@ def make_workflow_ro_crate(self) -> None: # Create the RO Crate object self.crate = rocrate.rocrate.ROCrate() - # Set language type - programming_language = rocrate.model.entity.Entity( - self.crate, - "#nextflow", - properties={ - "@type": ["ComputerLanguage", "SoftwareApplication"], - "name": "Nextflow", - "url": "https://www.nextflow.io/", - "identifier": "https://www.nextflow.io/", - "version": self.pipeline_obj.nf_config.get("manifest.nextflowVersion", ""), - }, - ) - self.crate.add(programming_language) - # Conform to RO-Crate 1.1 and workflowhub-ro-crate self.crate.update_jsonld( { @@ -218,67 +205,66 @@ def set_main_entity(self, main_entity_filename: str): dest_path=main_entity_filename, main=True, lang="nextflow", # adds the #nextflow entity automatically and connects it to programmingLanguage - lang_version="X.Y.Z", # sets version on #nextflow + lang_version=self.pipeline_obj.nf_config.get("manifest.nextflowVersion", ""), + ) + + self.crate.mainEntity.append_to( + "dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", compact=True + ) + # add dateCreated and dateModified, based on the current data + self.crate.mainEntity.append_to("dateCreated", self.crate.root_dataset.get("dateCreated", ""), compact=True) + self.crate.mainEntity.append_to( + "dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True ) - # wf_file = self.crate.add_file( - # main_entity_filename, - # properties={"@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"]}, - # ) - # wf_file = cast(rocrate.model.entity.Entity, wf_file) # ro-crate is untyped so need to cast type manually - - # wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}, compact=True) - # wf_file.append_to( - # "dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", compact=True - # ) - # # add dateCreated and dateModified, based on the current data - # wf_file.append_to("dateCreated", self.crate.root_dataset.get("dateCreated", ""), compact=True) - # wf_file.append_to("dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) - # wf_file.append_to("sdPublisher", {"@id": "https://nf-co.re/"}, compact=True) - # if self.version.endswith("dev"): - # url = "dev" - # else: - # url = self.version - # wf_file.append_to("url", f"https://nf-co.re/{self.crate.name.replace('nf-core/','')}/{url}/", compact=True) - # wf_file.append_to("version", self.version, compact=True) - # if self.pipeline_obj.schema_obj is not None: - # log.debug("input value") - - # schema_input = self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["properties"][ - # "input" - # ] - # input_value: Dict[str, Union[str, List[str], bool]] = { - # "@id": "#input", - # "@type": ["PropertyValueSpecification", "FormalParameter"], - # "default": schema_input.get("default", ""), - # "encodingFormat": schema_input.get("mimetype", ""), - # "valueRequired": "input" - # in self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["required"], - # "dct:conformsTo": "https://bioschemas.org/types/FormalParameter/1.0-RELEASE", - # } - # self.crate.add_jsonld(input_value) - # wf_file.append_to( - # "input", - # {"@id": "#input"}, - # ) - - # # get keywords from nf-core website - # remote_workflows = requests.get("https://nf-co.re/pipelines.json").json()["remote_workflows"] - # # go through all remote workflows and find the one that matches the pipeline name - # topics = ["nf-core", "nextflow"] - # for remote_wf in remote_workflows: - # if remote_wf["name"] == self.pipeline_obj.pipeline_name.replace("nf-core/", ""): - # topics = topics + remote_wf["topics"] - # break - - # log.debug(f"Adding topics: {topics}") - # wf_file.append_to("keywords", topics) - - # self.add_main_authors(wf_file) - - # self.crate.mainEntity = wf_file - - # wf_file.append_to("license", self.crate.license) - # wf_file.append_to("name", self.crate.name) + self.crate.mainEntity.append_to("sdPublisher", {"@id": "https://nf-co.re/"}, compact=True) + if self.version.endswith("dev"): + url = "dev" + else: + url = self.version + self.crate.mainEntity.append_to( + "url", f"https://nf-co.re/{self.crate.name.replace('nf-core/','')}/{url}/", compact=True + ) + self.crate.mainEntity.append_to("version", self.version, compact=True) + if self.pipeline_obj.schema_obj is not None: + log.debug("input value") + + schema_input = self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["properties"][ + "input" + ] + input_value: Dict[str, Union[str, List[str], bool]] = { + "@id": "#input", + "@type": ["PropertyValueSpecification", "FormalParameter"], + "default": schema_input.get("default", ""), + "encodingFormat": schema_input.get("mimetype", ""), + "valueRequired": "input" + in self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["required"], + "dct:conformsTo": "https://bioschemas.org/types/FormalParameter/1.0-RELEASE", + } + self.crate.add_jsonld(input_value) + self.crate.mainEntity.append_to( + "input", + {"@id": "#input"}, + ) + + # get keywords from nf-core website + remote_workflows = requests.get("https://nf-co.re/pipelines.json").json()["remote_workflows"] + # go through all remote workflows and find the one that matches the pipeline name + topics = ["nf-core", "nextflow"] + for remote_wf in remote_workflows: + assert self.pipeline_obj.pipeline_name is not None # mypy + if remote_wf["name"] == self.pipeline_obj.pipeline_name.replace("nf-core/", ""): + topics = topics + remote_wf["topics"] + break + + log.debug(f"Adding topics: {topics}") + self.crate.mainEntity.append_to("keywords", topics) + + # self.add_main_authors(self.crate.mainEntity) + + self.crate.mainEntity = self.crate.mainEntity + + self.crate.mainEntity.append_to("license", self.crate.license) + self.crate.mainEntity.append_to("name", self.crate.name) def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: """ @@ -358,12 +344,48 @@ def add_workflow_files(self): # exclude github action files wf_filenames = [fn for fn in wf_filenames if not fn.startswith(".github/") and not fn == "main.nf"] log.debug(f"Adding {len(wf_filenames)} workflow files") + # find all main.nf files inside modules/nf-core and subworkflows/nf-core + component_files = [ + fn + for fn in wf_filenames + if ((fn.startswith("modules/nf-core") or fn.startswith("subworkflows/nf-core")) and fn.endswith("main.nf")) + ] + + wf_dirs = [str(Path(fn).parent) for fn in component_files] + for wf_dir in wf_dirs: + if Path(wf_dir).exists(): + log.debug(f"Adding workflow directory: {wf_dir}") + component_type = wf_dir.split("/")[0] + component_name = wf_dir.replace(component_type + "/nf-core/", "").replace("/", "_") + self.crate.add_directory( + wf_dir, + dest_path=wf_dir, + properties={ + "description": f"nf-core {component_type} [{component_name}](https://nf-co.re/{component_type}/{component_name}) installed from the [nf-core/modules repository](https://github.com/nf-core/modules/)." + }, + ) + wf_locals = [ + str(Path(fn).parent) + for fn in wf_filenames + if fn.startswith("modules/local") or fn.startswith("subworkflows/local") and fn.endswith("main.nf") + ] + + for wf_dir in wf_locals: + log.debug(f"Adding workflow directory: {wf_dir}") + component_type = wf_dir.split("/")[0].rstrip("s") + component_name = wf_dir.replace(component_type + "/local/", "").replace("/", "_") + + self.crate.add_directory(wf_dir, dest_path=wf_dir, properties={"description": f"local {component_type}"}) + # go through all files that are not part of directories inside wf_dirs + wf_filenames = [ + fn for fn in wf_filenames if not any(fn.startswith(str(wf_dir)) for wf_dir in wf_dirs + wf_locals) + ] for fn in wf_filenames: # add nextflow language to .nf and .config files - # if fn.endswith(".nf") or fn.endswith(".config") or fn.endswith(".nf.test") and not fn.endswith("main.nf"): - # log.debug(f"Adding workflow file: {fn}") - # self.crate.add_file(fn, properties={"programmingLanguage": {"@id": "#nextflow"}}) - # continue + if fn.endswith(".nf") or fn.endswith(".config") or fn.endswith(".nf.test"): + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn, dest_path=fn, properties={"programmingLanguage": {"@id": "#nextflow"}}) + continue if fn.endswith(".png"): log.debug(f"Adding workflow image file: {fn}") self.crate.add_jsonld({"@id": fn, "@type": ["File", "ImageObject"]}) @@ -379,13 +401,13 @@ def add_workflow_files(self): self.crate.mainEntity.append_to("image", {"@id": Path(fn).name}) continue if fn.endswith(".md"): - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn, properties={"encodingFormat": "text/markdown"}) + log.debug(f"Adding file: {fn}") + self.crate.add_file(fn, dest_path=fn, properties={"encodingFormat": "text/markdown"}) + continue + else: + log.debug(f"Adding file: {fn}") + self.crate.add_file(fn, dest_path=fn) continue - # else: - # log.debug(f"Adding workflow file: {fn}") - # self.crate.add_file(fn) - # continue def set_crate_paths(self, path: Path) -> None: """Given a pipeline name, directory, or path, set wf_crate_filename""" diff --git a/nf_core/utils.py b/nf_core/utils.py index 21f44e40e0..7eb30a0cbb 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -19,7 +19,7 @@ import time from contextlib import contextmanager from pathlib import Path -from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, Generator, List, Optional, Tuple, Union import git import prompt_toolkit.styles @@ -35,7 +35,9 @@ from rich.spinner import Spinner import nf_core -from nf_core.pipelines.schema import PipelineSchema + +if TYPE_CHECKING: + from nf_core.pipelines.schema import PipelineSchema log = logging.getLogger(__name__) @@ -170,8 +172,8 @@ def __init__(self, wf_path: Path) -> None: self.repo: Optional[git.Repo] = None try: - repo = git.Repo(self.wf_path) - self.git_sha = repo.head.object.hexsha + self.repo = git.Repo(self.wf_path) + self.git_sha = self.repo.head.object.hexsha except Exception as e: log.debug(f"Could not find git hash for pipeline: {self.wf_path}. {e}") From f54becb4913d2d8174723713b75704705d4dff66 Mon Sep 17 00:00:00 2001 From: mashehu Date: Tue, 27 Aug 2024 15:55:40 +0200 Subject: [PATCH 39/65] ro_crate -> rocrate --- nf_core/__main__.py | 12 ++++++------ nf_core/commands_pipelines.py | 6 +++--- nf_core/pipelines/rocrate.py | 21 ++++++++++++--------- tests/test_rocrate.py | 2 +- 4 files changed, 22 insertions(+), 19 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 243ef736d0..16d7c3c565 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -36,7 +36,7 @@ pipelines_launch, pipelines_lint, pipelines_list, - pipelines_ro_crate, + pipelines_rocrate, pipelines_schema_build, pipelines_schema_docs, pipelines_schema_lint, @@ -87,7 +87,7 @@ }, { "name": "For developers", - "commands": ["create", "lint", "bump-version", "sync", "schema", "ro-crate", "create-logo"], + "commands": ["create", "lint", "bump-version", "sync", "schema", "rocrate", "create-logo"], }, ], "nf-core modules": [ @@ -581,8 +581,8 @@ def command_pipelines_list(ctx, keywords, sort, json, show_archived): pipelines_list(ctx, keywords, sort, json, show_archived) -# nf-core pipelines ro-crate -@pipelines.command("ro-crate") +# nf-core pipelines rocrate +@pipelines.command("rocrate") @click.argument( "pipeline_dir", type=click.Path(exists=True), @@ -606,7 +606,7 @@ def command_pipelines_list(ctx, keywords, sort, json, show_archived): default="", ) @click.pass_context -def ro_crate( +def rocrate( ctx, pipeline_dir: str, json_path: str, @@ -616,7 +616,7 @@ def ro_crate( """ Make an Research Object Crate """ - pipelines_ro_crate(ctx, pipeline_dir, json_path, zip_path, pipeline_version) + pipelines_rocrate(ctx, pipeline_dir, json_path, zip_path, pipeline_version) # nf-core pipelines sync diff --git a/nf_core/commands_pipelines.py b/nf_core/commands_pipelines.py index 78bb6c86db..7cb1285be4 100644 --- a/nf_core/commands_pipelines.py +++ b/nf_core/commands_pipelines.py @@ -279,8 +279,8 @@ def pipelines_list(ctx, keywords, sort, json, show_archived): stdout.print(list_workflows(keywords, sort, json, show_archived)) -# nf-core pipelines ro-crate -def pipelines_ro_crate( +# nf-core pipelines rocrate +def pipelines_rocrate( ctx, pipeline_dir: Union[str, Path], json_path: Optional[Union[str, Path]], @@ -300,7 +300,7 @@ def pipelines_ro_crate( zip_path = Path(zip_path) try: rocrate_obj = ROCrate(pipeline_dir, pipeline_version) - rocrate_obj.create_ro_crate(pipeline_dir, metadata_path=json_path, zip_path=zip_path) + rocrate_obj.create_rocrate(pipeline_dir, metadata_path=json_path, zip_path=zip_path) except (UserWarning, LookupError, FileNotFoundError) as e: log.error(e) sys.exit(1) diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index cb5672777d..96e9dc9ca0 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -3,15 +3,14 @@ import logging import os +import re import sys import tempfile from datetime import datetime from pathlib import Path from typing import Dict, List, Optional, Set, Union -from urllib.parse import quote import requests -import rocrate.model.entity import rocrate.rocrate from git import GitCommandError, InvalidGitRepositoryError from rich.progress import BarColumn, Progress @@ -53,7 +52,7 @@ def __init__(self, pipeline_dir: Path, version="") -> None: setup_requests_cachedir() - def create_ro_crate( + def create_rocrate( self, outdir: Path, metadata_path: Union[None, Path] = None, zip_path: Union[None, Path] = None ) -> None: """ @@ -97,7 +96,7 @@ def create_ro_crate( log.error(f"Could not checkout version {self.version}") sys.exit(1) self.version = self.pipeline_obj.nf_config.get("manifest.version", "") - self.make_workflow_ro_crate() + self.make_workflow_rocrate() # Save just the JSON metadata file if metadata_path is not None: @@ -124,7 +123,7 @@ def create_ro_crate( # Change back to the original directory os.chdir(current_path) - def make_workflow_ro_crate(self) -> None: + def make_workflow_rocrate(self) -> None: """ Create an RO Crate for a pipeline """ @@ -259,7 +258,7 @@ def set_main_entity(self, main_entity_filename: str): log.debug(f"Adding topics: {topics}") self.crate.mainEntity.append_to("keywords", topics) - # self.add_main_authors(self.crate.mainEntity) + self.add_main_authors(self.crate.mainEntity) self.crate.mainEntity = self.crate.mainEntity @@ -324,9 +323,14 @@ def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: for author in named_contributors: log.debug(f"Adding author: {author}") + assert self.pipeline_obj.repo is not None # mypy + # get email from git log + email = self.pipeline_obj.repo.git.log(f"--author={author}", "--pretty=format:%ae", "-1") orcid = get_orcid(author) author_entitity = self.crate.add( - Person(self.crate, orcid if orcid is not None else "#" + quote(author), properties={"name": author}) + Person( + self.crate, orcid if orcid is not None else "#" + email, properties={"name": author, "email": email} + ) ) wf_file.append_to("creator", author_entitity) if author in authors: @@ -336,7 +340,6 @@ def add_workflow_files(self): """ Add workflow files to the RO Crate """ - import re import nf_core.utils @@ -361,7 +364,7 @@ def add_workflow_files(self): wf_dir, dest_path=wf_dir, properties={ - "description": f"nf-core {component_type} [{component_name}](https://nf-co.re/{component_type}/{component_name}) installed from the [nf-core/modules repository](https://github.com/nf-core/modules/)." + "description": f"nf-core {re.sub('s$', '', component_type)} [{component_name}](https://nf-co.re/{component_type}/{component_name}) installed from the [nf-core/modules repository](https://github.com/nf-core/modules/)." }, ) wf_locals = [ diff --git a/tests/test_rocrate.py b/tests/test_rocrate.py index eac61fac48..6defd5d5e8 100644 --- a/tests/test_rocrate.py +++ b/tests/test_rocrate.py @@ -54,7 +54,7 @@ def test_rocrate_creation(self): # Run the command self.rocrate_obj = nf_core.pipelines.rocrate.ROCrate(self.test_pipeline_dir) - self.rocrate_obj.create_ro_crate(self.test_pipeline_dir, metadata_path=Path(self.test_pipeline_dir)) + self.rocrate_obj.create_rocrate(self.test_pipeline_dir, metadata_path=Path(self.test_pipeline_dir)) # Check that the crate was created self.assertTrue(Path(self.test_pipeline_dir, "ro-crate-metadata.json").exists()) From 6645e4cb3e4dab93b16964f15e70a60f24390044 Mon Sep 17 00:00:00 2001 From: mashehu Date: Tue, 16 Jan 2024 09:46:48 +0100 Subject: [PATCH 40/65] second attempt to add ro crates --- nf_core/ro_crate.py | 46 +++++++++++++++++++++++++++++++++++++++++++++ nf_core/utils.py | 2 +- 2 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 nf_core/ro_crate.py diff --git a/nf_core/ro_crate.py b/nf_core/ro_crate.py new file mode 100644 index 0000000000..5e205fa056 --- /dev/null +++ b/nf_core/ro_crate.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +""" Code to deal with pipeline RO (Research Object) Crates """ + + +import logging +import tempfile +from pathlib import Path + +import rocrate.model.entity +import rocrate.rocrate +from typing import Union + +log = logging.getLogger(__name__) + + +class RoCrate: + """Class to generate an RO Crate for a pipeline""" + + def __init__(self, pipeline_dir: Union[str, Path], version=""): + self.pipeline_dir = pipeline_dir + self.version = version + + def create_ro_create(self, outdir: Path, metadata_fn="", zip_fn=""): + """Create an RO Crate for the pipeline""" + + # Create a temporary directory for the RO Crate + rocrate_dir = tempfile.mkdtemp(prefix="nf-core-ro-crate-") + + # Create the RO Crate + wf_crate = rocrate.rocrate.ROCrate(rocrate_dir) + + # Set main entity file + wf_file = wf_crate.add_file(Path(self.pipeline_dir, "nextflow.config"), "nextflow.config") + wf_crate.mainEntity = wf_file + + # Set language type + programming_language = rocrate.model.entity.Entity( + wf_crate, + "https://www.nextflow.io/", + properties={ + "@type": ["ComputerLanguage", "SoftwareApplication"], + "name": "Nextflow", + "url": "https://www.nextflow.io/", + }, + ) + wf_crate.add(programming_language) diff --git a/nf_core/utils.py b/nf_core/utils.py index 87dd307e70..b1ff4d0058 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -254,7 +254,7 @@ def fetch_wf_config(wf_path: Path, cache_config: bool = True) -> dict: """ log.debug(f"Got '{wf_path}' as path") - + wf_path = Path(wf_path) config = {} cache_fn = None cache_basedir = None From 553135afb7c86f1eb9dc6f2e2925018b7b4fbe03 Mon Sep 17 00:00:00 2001 From: mashehu Date: Tue, 16 Jan 2024 09:49:01 +0100 Subject: [PATCH 41/65] fix import --- nf_core/ro_crate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nf_core/ro_crate.py b/nf_core/ro_crate.py index 5e205fa056..abdb926e6f 100644 --- a/nf_core/ro_crate.py +++ b/nf_core/ro_crate.py @@ -5,10 +5,10 @@ import logging import tempfile from pathlib import Path +from typing import Union import rocrate.model.entity import rocrate.rocrate -from typing import Union log = logging.getLogger(__name__) From bf30f0d40f1e238ad627ee63872b5df5d88c5e8e Mon Sep 17 00:00:00 2001 From: mashehu Date: Tue, 23 Jan 2024 18:21:53 +0100 Subject: [PATCH 42/65] first running version --- nf_core/pipelines/schema.py | 4 +- nf_core/ro_crate.py | 173 +++++++++++++++++++++++++++++++++--- nf_core/utils.py | 93 ++++++++++++++++--- 3 files changed, 244 insertions(+), 26 deletions(-) diff --git a/nf_core/pipelines/schema.py b/nf_core/pipelines/schema.py index 127aa123dc..cde4af3fd5 100644 --- a/nf_core/pipelines/schema.py +++ b/nf_core/pipelines/schema.py @@ -43,7 +43,7 @@ def __init__(self): self.schema_from_scratch = False self.no_prompts = False self.web_only = False - self.web_schema_build_url = "https://nf-co.re/pipeline_schema_builder" + self.web_schema_build_url = "https://oldsite.nf-co.re/pipeline_schema_builder" self.web_schema_build_web_url = None self.web_schema_build_api_url = None self.validation_plugin = None @@ -956,6 +956,7 @@ def launch_web_builder(self): """ Send pipeline schema to web builder and wait for response """ + content = { "post_content": "json_schema", "api": "true", @@ -964,6 +965,7 @@ def launch_web_builder(self): "schema": json.dumps(self.schema), } web_response = nf_core.utils.poll_nfcore_web_api(self.web_schema_build_url, content) + try: if "api_url" not in web_response: raise AssertionError('"api_url" not in web_response') diff --git a/nf_core/ro_crate.py b/nf_core/ro_crate.py index abdb926e6f..fe8bc6a998 100644 --- a/nf_core/ro_crate.py +++ b/nf_core/ro_crate.py @@ -7,8 +7,12 @@ from pathlib import Path from typing import Union +import requests import rocrate.model.entity import rocrate.rocrate +from rocrate.model.person import Person + +from nf_core.utils import Pipeline log = logging.getLogger(__name__) @@ -16,31 +20,178 @@ class RoCrate: """Class to generate an RO Crate for a pipeline""" - def __init__(self, pipeline_dir: Union[str, Path], version=""): + def __init__(self, pipeline_dir: Path, version=""): + from nf_core.utils import is_pipeline_directory + + is_pipeline_directory(pipeline_dir) self.pipeline_dir = pipeline_dir self.version = version + self.crate: rocrate.rocrate.ROCrate + self.pipeline_obj = Pipeline(str(self.pipeline_dir)) + self.pipeline_obj._load() - def create_ro_create(self, outdir: Path, metadata_fn="", zip_fn=""): + def create_ro_crate(self, outdir: Path, metadata_fn="", zip_fn=""): """Create an RO Crate for the pipeline""" - # Create a temporary directory for the RO Crate - rocrate_dir = tempfile.mkdtemp(prefix="nf-core-ro-crate-") + # Set input paths + self.get_crate_paths(outdir) + + self.make_workflow_ro_crate(self.pipeline_dir) + + # Save just the JSON metadata file + if metadata_fn is not None: + log.info(f"Saving metadata file '{metadata_fn}'") + # Save the crate to a temporary directory + tmpdir = Path(tempfile.mkdtemp(), "wf") + self.crate.write(tmpdir) + # Now save just the JSON file + crate_json_fn = Path(tmpdir, "ro-crate-metadata.json") + crate_json_fn.rename(metadata_fn) + + # Save the whole crate zip file + if zip_fn is not None: + log.info(f"Saving zip file '{zip_fn}'") + self.crate.write_zip(zip_fn) + + def make_workflow_ro_crate(self, path: Path): + import nf_core.utils + + if self.pipeline_obj is None: + raise ValueError("Pipeline object not loaded") # Create the RO Crate - wf_crate = rocrate.rocrate.ROCrate(rocrate_dir) + self.crate = rocrate.rocrate.ROCrate() - # Set main entity file - wf_file = wf_crate.add_file(Path(self.pipeline_dir, "nextflow.config"), "nextflow.config") - wf_crate.mainEntity = wf_file + # Conform to RO-Crate 1.1 and workflowhub-ro-crate # Set language type programming_language = rocrate.model.entity.Entity( - wf_crate, - "https://www.nextflow.io/", + self.crate, + "#nextflow", properties={ "@type": ["ComputerLanguage", "SoftwareApplication"], "name": "Nextflow", "url": "https://www.nextflow.io/", + "identifier": "https://www.nextflow.io/", + "version": self.pipeline_obj.nf_config.get("manifest.nextflowVersion", ""), }, ) - wf_crate.add(programming_language) + self.crate.add(programming_language) + self.crate.update_jsonld( + { + "@id": "ro-crate-metadata.json", + "conformsTo": [ + {"@id": "https://w3id.org/ro/crate/1.1"}, + {"@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0"}, + ], + } + ) + + # Set main entity file + wf_file = self.crate.add_jsonld( + { + "@id": "main.nf", + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + }, + ) + self.crate.mainEntity = wf_file + # self.crate.update_jsonld({"@id": "main.nf", "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"]}) + + self.add_authors(wf_file) + wf_file.append_to("programmingLanguage", programming_language) + + # add readme as description + readme = Path(self.pipeline_dir, "README.md") + self.crate.description = readme.read_text() + + self.crate.license = "MIT" + + # add doi as identifier + # self.crate.identifier = self.pipeline_obj.get("manifest", {}).get("doi", "") + self.crate.name = f'Research Object Crate for {self.pipeline_obj.nf_config.get("manifest.name")}' + + if "dev" in self.pipeline_obj.nf_config.get("manifest.version", ""): + self.crate.CreativeWorkStatus = "InProgress" + else: + self.crate.CreativeWorkStatus = "Stable" + + # Add all other files + wf_filenames = nf_core.utils.get_wf_files(self.pipeline_dir) + log.debug(f"Adding {len(wf_filenames)} workflow files") + for fn in wf_filenames: + # check if it wasn't already added + if fn == "main.nf": + continue + # add nextflow language to .nf and .config files + if fn.endswith(".nf") or fn.endswith(".config"): + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn, properties={"programmingLanguage": {"@id": "#nextflow"}}) + if fn.endswith(".png"): + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn, properties={"@type": ["File", "ImageObject"]}) + if "metro_map" in fn: + log.info(f"Setting main entity image to: {fn}") + wf_file.append_to("image", {"@id": fn}) + if fn.endswith(".md"): + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn, properties={"encodingFormat": "text/markdown"}) + else: + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn) + + # Add keywords from github topics + + def add_authors(self, wf_file): + """ + Add workflow authors to the crate + NB: We don't have much metadata here - scope to improve in the future + """ + # add author entity to crate + + try: + authors = self.pipeline_obj.nf_config["manifest.author"].split(",") + except KeyError: + log.error("No author field found in manifest of nextflow.config") + return + for author in authors: + log.debug(f"Adding author: {author}") + orcid = get_orcid(author) + author_entitity = self.crate.add(Person(self.crate, orcid, properties={"name": author})) + wf_file.append_to("author", author_entitity) + + def get_crate_paths(self, path): + """Given a pipeline name, directory, or path, set wf_crate_filename""" + + path = Path(path) + + if path.is_dir(): + self.pipeline_dir = path + # wf_crate_filename = path / "ro-crate-metadata.json" + elif path.is_file(): + self.pipeline_dir = path.parent + # wf_crate_filename = path + + # Check that the schema file exists + if self.pipeline_dir is None: + raise OSError(f"Could not find pipeline '{path}'") + + +def get_orcid(name: str) -> Union[str, None]: + base_url = "https://pub.orcid.org/v3.0/search/" + headers = { + "Accept": "application/json", + } + params = {"q": f'family-name:"{name.split()[-1]}" AND given-names:"{name.split()[0]}"'} + response = requests.get(base_url, params=params, headers=headers) + + if response.status_code == 200: + json_response = response.json() + if json_response.get("num-found") == 1: + orcid_uri = json_response.get("result")[0].get("orcid-identifier", {}).get("uri") + log.info(f"Using found ORCID for {name}. Please double-check: {orcid_uri}") + return orcid_uri + else: + log.debug(f"No exact ORCID found for {name}. See {response.url}") + return None + else: + return f"API request unsuccessful. Status code: {response.status_code}" diff --git a/nf_core/utils.py b/nf_core/utils.py index b1ff4d0058..74dfe40238 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -5,6 +5,7 @@ import concurrent.futures import datetime import errno +import fnmatch import hashlib import io import json @@ -52,14 +53,29 @@ [ ("qmark", "fg:ansiblue bold"), # token in front of the question ("question", "bold"), # question text - ("answer", "fg:ansigreen nobold bg:"), # submitted answer text behind the question - ("pointer", "fg:ansiyellow bold"), # pointer used in select and checkbox prompts - ("highlighted", "fg:ansiblue bold"), # pointed-at choice in select and checkbox prompts - ("selected", "fg:ansiyellow noreverse bold"), # style for a selected item of a checkbox + ( + "answer", + "fg:ansigreen nobold bg:", + ), # submitted answer text behind the question + ( + "pointer", + "fg:ansiyellow bold", + ), # pointer used in select and checkbox prompts + ( + "highlighted", + "fg:ansiblue bold", + ), # pointed-at choice in select and checkbox prompts + ( + "selected", + "fg:ansiyellow noreverse bold", + ), # style for a selected item of a checkbox ("separator", "fg:ansiblack"), # separator in lists ("instruction", ""), # user instructions for select, rawselect, checkbox ("text", ""), # plain text - ("disabled", "fg:gray italic"), # disabled choices for select and checkbox prompts + ( + "disabled", + "fg:gray italic", + ), # disabled choices for select and checkbox prompts ("choice-default", "fg:ansiblack"), ("choice-default-changed", "fg:ansiyellow"), ("choice-required", "fg:ansired"), @@ -79,7 +95,11 @@ def fetch_remote_version(source_url): return remote_version -def check_if_outdated(current_version=None, remote_version=None, source_url="https://nf-co.re/tools_version"): +def check_if_outdated( + current_version=None, + remote_version=None, + source_url="https://nf-co.re/tools_version", +): """ Check if the current version of nf-core is outdated """ @@ -441,6 +461,7 @@ def poll_nfcore_web_api(api_url: str, post_data: Optional[Dict] = None) -> Dict: if post_data is None: response = requests.get(api_url, headers={"Cache-Control": "no-cache"}) else: + log.debug(f"requesting {api_url} with {post_data}") response = requests.post(url=api_url, data=post_data) except requests.exceptions.Timeout: raise AssertionError(f"URL timed out: {api_url}") @@ -526,7 +547,8 @@ def __call__(self, r): with open(gh_cli_config_fn) as fh: gh_cli_config = yaml.safe_load(fh) self.auth = requests.auth.HTTPBasicAuth( - gh_cli_config["github.com"]["user"], gh_cli_config["github.com"]["oauth_token"] + gh_cli_config["github.com"]["user"], + gh_cli_config["github.com"]["oauth_token"], ) self.auth_mode = f"gh CLI config: {gh_cli_config['github.com']['user']}" except Exception: @@ -794,12 +816,18 @@ def get_tag_date(tag_date): # Obtain version and build match = re.search(r"(?::)+([A-Za-z\d\-_.]+)", img["image_name"]) if match is not None: - all_docker[match.group(1)] = {"date": get_tag_date(img["updated"]), "image": img} + all_docker[match.group(1)] = { + "date": get_tag_date(img["updated"]), + "image": img, + } elif img["image_type"] == "Singularity": # Obtain version and build match = re.search(r"(?::)+([A-Za-z\d\-_.]+)", img["image_name"]) if match is not None: - all_singularity[match.group(1)] = {"date": get_tag_date(img["updated"]), "image": img} + all_singularity[match.group(1)] = { + "date": get_tag_date(img["updated"]), + "image": img, + } # Obtain common builds from Docker and Singularity images common_keys = list(all_docker.keys() & all_singularity.keys()) current_date = None @@ -929,13 +957,19 @@ def prompt_pipeline_release_branch( # Releases if len(wf_releases) > 0: for tag in map(lambda release: release.get("tag_name"), wf_releases): - tag_display = [("fg:ansiblue", f"{tag} "), ("class:choice-default", "[release]")] + tag_display = [ + ("fg:ansiblue", f"{tag} "), + ("class:choice-default", "[release]"), + ] choices.append(questionary.Choice(title=tag_display, value=tag)) tag_set.append(str(tag)) # Branches for branch in wf_branches.keys(): - branch_display = [("fg:ansiyellow", f"{branch} "), ("class:choice-default", "[branch]")] + branch_display = [ + ("fg:ansiyellow", f"{branch} "), + ("class:choice-default", "[branch]"), + ] choices.append(questionary.Choice(title=branch_display, value=branch)) tag_set.append(branch) @@ -966,7 +1000,8 @@ def validate(self, value): return True else: raise questionary.ValidationError( - message="Invalid remote cache index file", cursor_position=len(value.text) + message="Invalid remote cache index file", + cursor_position=len(value.text), ) else: return True @@ -996,7 +1031,13 @@ def get_repo_releases_branches(pipeline, wfs): pipeline = wf.full_name # Store releases and stop loop - wf_releases = list(sorted(wf.releases, key=lambda k: k.get("published_at_timestamp", 0), reverse=True)) + wf_releases = list( + sorted( + wf.releases, + key=lambda k: k.get("published_at_timestamp", 0), + reverse=True, + ) + ) break # Arbitrary GitHub repo @@ -1016,7 +1057,13 @@ def get_repo_releases_branches(pipeline, wfs): raise AssertionError(f"Not able to find pipeline '{pipeline}'") except AttributeError: # Success! We have a list, which doesn't work with .get() which is looking for a dict key - wf_releases = list(sorted(rel_r.json(), key=lambda k: k.get("published_at_timestamp", 0), reverse=True)) + wf_releases = list( + sorted( + rel_r.json(), + key=lambda k: k.get("published_at_timestamp", 0), + reverse=True, + ) + ) # Get release tag commit hashes if len(wf_releases) > 0: @@ -1348,3 +1395,21 @@ def set_wd(path: Path) -> Generator[None, None, None]: yield finally: os.chdir(start_wd) + + +def get_wf_files(wf_path: Path): + """Return a list of all files in a directory (ignores .gitigore files)""" + + wf_files = [] + + with open(Path(wf_path, ".gitignore")) as f: + lines = f.read().splitlines() + ignore = [line for line in lines if line and not line.startswith("#")] + + for path in Path(wf_path).rglob("*"): + if any(fnmatch.fnmatch(str(path), pattern) for pattern in ignore): + continue + if path.is_file(): + wf_files.append(str(path)) + + return wf_files From 9ed053a8c276fdfaedcfff72a27a5d6a8deecace Mon Sep 17 00:00:00 2001 From: mashehu Date: Wed, 24 Jan 2024 17:47:06 +0100 Subject: [PATCH 43/65] restructure code and add tests --- nf_core/ro_crate.py | 200 +++++++++++++++++++++++++++++++----------- tests/test_rocrate.py | 86 ++++++++++++++++++ 2 files changed, 234 insertions(+), 52 deletions(-) create mode 100644 tests/test_rocrate.py diff --git a/nf_core/ro_crate.py b/nf_core/ro_crate.py index fe8bc6a998..10c80e11cb 100644 --- a/nf_core/ro_crate.py +++ b/nf_core/ro_crate.py @@ -1,6 +1,5 @@ #!/usr/bin/env python -""" Code to deal with pipeline RO (Research Object) Crates """ - +"""Code to deal with pipeline RO (Research Object) Crates""" import logging import tempfile @@ -10,6 +9,7 @@ import requests import rocrate.model.entity import rocrate.rocrate +from git import GitCommandError, InvalidGitRepositoryError, Repo from rocrate.model.person import Person from nf_core.utils import Pipeline @@ -18,10 +18,17 @@ class RoCrate: - """Class to generate an RO Crate for a pipeline""" + """ + Class to generate an RO Crate for a pipeline + + Args: + pipeline_dir (Path): Path to the pipeline directory + version (str): Version of the pipeline to use + + """ def __init__(self, pipeline_dir: Path, version=""): - from nf_core.utils import is_pipeline_directory + from nf_core.utils import is_pipeline_directory, setup_requests_cachedir is_pipeline_directory(pipeline_dir) self.pipeline_dir = pipeline_dir @@ -30,13 +37,55 @@ def __init__(self, pipeline_dir: Path, version=""): self.pipeline_obj = Pipeline(str(self.pipeline_dir)) self.pipeline_obj._load() - def create_ro_crate(self, outdir: Path, metadata_fn="", zip_fn=""): - """Create an RO Crate for the pipeline""" + setup_requests_cachedir() + + def create_ro_crate( + self, outdir: Path, metadata_fn: Union[str, None, Path] = None, zip_fn: Union[str, None] = None + ) -> None: + """ + Create an RO Crate for a pipeline + + Args: + outdir (Path): Path to the output directory + metadata_fn (str): Filename for the metadata file + zip_fn (str): Filename for the zip file + + """ + import os # Set input paths - self.get_crate_paths(outdir) + try: + self.set_crate_paths(outdir) + except OSError as e: + log.error(e) + sys.exit(1) + + # Change to the pipeline directory, because the RO Crate doesn't handle relative paths well + current_path = Path.cwd() + os.chdir(self.pipeline_dir) + + # Check that the checkout pipeline version is the same as the requested version + if self.version: + if self.version != self.pipeline_obj.nf_config.get("manifest.version"): + # using git checkout to get the requested version + log.info(f"Checking out pipeline version {self.version}") + try: + self.repo = Repo(self.pipeline_dir) + self.repo.git.checkout(self.version) + self.pipeline_obj = Pipeline(str(self.pipeline_dir)) + self.pipeline_obj._load() + except InvalidGitRepositoryError: + log.error(f"Could not find a git repository in {self.pipeline_dir}") + sys.exit(1) + except GitCommandError: + log.error(f"Could not checkout version {self.version}") + sys.exit(1) - self.make_workflow_ro_crate(self.pipeline_dir) + try: + self.make_workflow_ro_crate() + except Exception as e: + log.error(e) + sys.exit(1) # Save just the JSON metadata file if metadata_fn is not None: @@ -53,17 +102,22 @@ def create_ro_crate(self, outdir: Path, metadata_fn="", zip_fn=""): log.info(f"Saving zip file '{zip_fn}'") self.crate.write_zip(zip_fn) - def make_workflow_ro_crate(self, path: Path): - import nf_core.utils + # Change back to the original directory + os.chdir(current_path) + + def make_workflow_ro_crate(self) -> None: + """ + Create an RO Crate for a pipeline + Args: + path (Path): Path to the pipeline directory + """ if self.pipeline_obj is None: raise ValueError("Pipeline object not loaded") - # Create the RO Crate + # Create the RO Crate object self.crate = rocrate.rocrate.ROCrate() - # Conform to RO-Crate 1.1 and workflowhub-ro-crate - # Set language type programming_language = rocrate.model.entity.Entity( self.crate, @@ -77,6 +131,8 @@ def make_workflow_ro_crate(self, path: Path): }, ) self.crate.add(programming_language) + + # Conform to RO-Crate 1.1 and workflowhub-ro-crate self.crate.update_jsonld( { "@id": "ro-crate-metadata.json", @@ -88,26 +144,19 @@ def make_workflow_ro_crate(self, path: Path): ) # Set main entity file - wf_file = self.crate.add_jsonld( - { - "@id": "main.nf", - "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], - }, - ) - self.crate.mainEntity = wf_file - # self.crate.update_jsonld({"@id": "main.nf", "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"]}) - - self.add_authors(wf_file) - wf_file.append_to("programmingLanguage", programming_language) + self.set_main_entity("main.nf") # add readme as description - readme = Path(self.pipeline_dir, "README.md") - self.crate.description = readme.read_text() + readme = Path("README.md") + + try: + self.crate.description = readme.read_text() + except FileNotFoundError: + log.error(f"Could not find README.md in {self.pipeline_dir}") self.crate.license = "MIT" # add doi as identifier - # self.crate.identifier = self.pipeline_obj.get("manifest", {}).get("doi", "") self.crate.name = f'Research Object Crate for {self.pipeline_obj.nf_config.get("manifest.name")}' if "dev" in self.pipeline_obj.nf_config.get("manifest.version", ""): @@ -116,32 +165,35 @@ def make_workflow_ro_crate(self, path: Path): self.crate.CreativeWorkStatus = "Stable" # Add all other files - wf_filenames = nf_core.utils.get_wf_files(self.pipeline_dir) - log.debug(f"Adding {len(wf_filenames)} workflow files") - for fn in wf_filenames: - # check if it wasn't already added - if fn == "main.nf": - continue - # add nextflow language to .nf and .config files - if fn.endswith(".nf") or fn.endswith(".config"): - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn, properties={"programmingLanguage": {"@id": "#nextflow"}}) - if fn.endswith(".png"): - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn, properties={"@type": ["File", "ImageObject"]}) - if "metro_map" in fn: - log.info(f"Setting main entity image to: {fn}") - wf_file.append_to("image", {"@id": fn}) - if fn.endswith(".md"): - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn, properties={"encodingFormat": "text/markdown"}) - else: - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn) + self.add_workflow_files() - # Add keywords from github topics + def set_main_entity(self, main_entity_filename: str): + """ + Set the main.nf as the main entity of the crate and add necessary metadata + """ - def add_authors(self, wf_file): + wf_file = self.crate.add_jsonld( + { + "@id": main_entity_filename, + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + }, + ) + self.crate.mainEntity = wf_file + self.add_main_authors(wf_file) + wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}) + # get keywords from nf-core website + remote_workflows = requests.get("https://nf-co.re/pipelines.json").json()["remote_workflows"] + # go through all remote workflows and find the one that matches the pipeline name + topics = ["nf-core", "nextflow"] + for remote_wf in remote_workflows: + if remote_wf["name"] == self.pipeline_obj.pipeline_name.replace("nf-core/", ""): + topics = topics + remote_wf["topics"] + break + + log.debug(f"Adding topics: {topics}") + wf_file.append_to("keywords", topics) + + def add_main_authors(self, wf_file): """ Add workflow authors to the crate NB: We don't have much metadata here - scope to improve in the future @@ -159,7 +211,42 @@ def add_authors(self, wf_file): author_entitity = self.crate.add(Person(self.crate, orcid, properties={"name": author})) wf_file.append_to("author", author_entitity) - def get_crate_paths(self, path): + def add_workflow_files(self): + """ + Add workflow files to the RO Crate + """ + import nf_core.utils + + wf_filenames = nf_core.utils.get_wf_files(Path.cwd()) + # exclude github action files + wf_filenames = [fn for fn in wf_filenames if not fn.startswith(".github/")] + log.debug(f"Adding {len(wf_filenames)} workflow files") + for fn in wf_filenames: + # skip main.nf + if fn == "main.nf": + continue + # add nextflow language to .nf and .config files + if fn.endswith(".nf") or fn.endswith(".config") or fn.endswith(".nf.test"): + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn, properties={"programmingLanguage": {"@id": "#nextflow"}}) + continue + if fn.endswith(".png"): + log.debug(f"Adding workflow image file: {fn}") + self.crate.add_jsonld({"@id": Path(fn).name, "@type": ["File", "ImageObject"]}) + if "metro_map" in fn: + log.info(f"Setting main entity image to: {fn}") + self.crate.mainEntity.append_to("image", {"@id": Path(fn).name}) + continue + if fn.endswith(".md"): + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn, properties={"encodingFormat": "text/markdown"}) + continue + else: + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn) + continue + + def set_crate_paths(self, path: Path) -> None: """Given a pipeline name, directory, or path, set wf_crate_filename""" path = Path(path) @@ -177,6 +264,15 @@ def get_crate_paths(self, path): def get_orcid(name: str) -> Union[str, None]: + """ + Get the ORCID for a given name + + Args: + name (str): Name of the author + + Returns: + str: ORCID URI or None + """ base_url = "https://pub.orcid.org/v3.0/search/" headers = { "Accept": "application/json", diff --git a/tests/test_rocrate.py b/tests/test_rocrate.py new file mode 100644 index 0000000000..3db1876c04 --- /dev/null +++ b/tests/test_rocrate.py @@ -0,0 +1,86 @@ +""" Test the nf-core rocrate command """ + + +import shutil +import tempfile +import unittest +from pathlib import Path + +import rocrate.rocrate +from git import Repo + +import nf_core.create +import nf_core.rocrate +import nf_core.utils + + +class TestROCrate(unittest.TestCase): + """Class for lint tests""" + + def setUp(self): + """Function that runs at start of tests for common resources + + Use nf_core.create() to make a pipeline that we can use for testing + """ + + self.tmp_dir = Path(tempfile.mkdtemp()) + self.test_pipeline_dir = Path(self.tmp_dir, "nf-core-testpipeline") + self.create_obj = nf_core.create.PipelineCreate( + name="testpipeline", + description="This is a test pipeline", + author="Test McTestFace", + outdir=self.test_pipeline_dir, + version="1.0.0", + no_git=False, + force=True, + plain=True, + ) + self.create_obj.init_pipeline() + + # add fake metro map + Path(self.test_pipeline_dir, "docs", "images", "nf-core-testpipeline_metro_map.png").touch() + # commit the changes + repo = Repo(self.test_pipeline_dir) + repo.git.add(A=True) + repo.index.commit("Initial commit") + + def tearDown(self): + """Clean up temporary files and folders""" + + if self.tmp_dir.exists(): + shutil.rmtree(self.tmp_dir) + + def test_rocrate_creation(self): + """Run the nf-core rocrate command""" + + # Run the command + self.rocrate_obj = nf_core.rocrate.RoCrate(self.test_pipeline_dir) + self.rocrate_obj.create_ro_crate( + self.test_pipeline_dir, metadata_fn=Path(self.test_pipeline_dir, "ro-crate-metadata.json") + ) + + # Check that the crate was created + self.assertTrue(Path(self.test_pipeline_dir, "ro-crate-metadata.json").exists()) + + # Check that the entries in the crate are correct + crate = rocrate.rocrate.ROCrate(str(self.test_pipeline_dir)) + entities = crate.get_entities() + + # Check if the correct entities are set: + for entity in entities: + entity_json = entity.as_jsonld() + if entity_json["@id"] == "./": + self.assertEqual(entity_json.get("name"), "Research Object Crate for nf-core/testpipeline") + self.assertEqual(entity_json["mainEntity"], {"@id": "#main.nf"}) + elif entity_json["@id"] == "#main.nf": + self.assertEqual(entity_json["programmingLanguage"], [{"@id": "#nextflow"}]) + self.assertEqual(entity_json["image"], [{"@id": "nf-core-testpipeline_metro_map.png"}]) + # assert there is a metro map + # elif entity_json["@id"] == "nf-core-testpipeline_metro_map.png": # FIXME waiting for https://github.com/ResearchObject/ro-crate-py/issues/174 + # self.assertEqual(entity_json["@type"], ["File", "ImageObject"]) + # assert that author is set as a person + elif "name" in entity_json and entity_json["name"] == "Test McTestFace": + self.assertEqual(entity_json["@type"], "Person") + # check that it is set as author of the main entity + if crate.mainEntity is not None: + self.assertEqual(crate.mainEntity["author"][0].id, entity_json["@id"]) From 56a170ce9b1268cf5199cc1ac70a82d25e6cd843 Mon Sep 17 00:00:00 2001 From: mashehu Date: Wed, 24 Jan 2024 18:13:27 +0100 Subject: [PATCH 44/65] add missing dep --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index f167a55804..9d05a6ec3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,6 +18,7 @@ requests requests_cache rich-click==1.8.* rich>=13.3.1 +rocrate tabulate textual==0.71.0 trogon From 8419e9f1c5b0a8a814a618a89ddf982040d3cc69 Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 12:27:32 +0100 Subject: [PATCH 45/65] add recommendations from comments --- nf_core/ro_crate.py | 51 +++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/nf_core/ro_crate.py b/nf_core/ro_crate.py index 10c80e11cb..9ae77cab1d 100644 --- a/nf_core/ro_crate.py +++ b/nf_core/ro_crate.py @@ -40,15 +40,15 @@ def __init__(self, pipeline_dir: Path, version=""): setup_requests_cachedir() def create_ro_crate( - self, outdir: Path, metadata_fn: Union[str, None, Path] = None, zip_fn: Union[str, None] = None + self, outdir: Path, metadata_path: Union[None, Path] = None, zip_path: Union[None, Path] = None ) -> None: """ Create an RO Crate for a pipeline Args: outdir (Path): Path to the output directory - metadata_fn (str): Filename for the metadata file - zip_fn (str): Filename for the zip file + metadata_path (Path): Path to the metadata file + zip_path (Path): Path to the zip file """ import os @@ -81,26 +81,29 @@ def create_ro_crate( log.error(f"Could not checkout version {self.version}") sys.exit(1) - try: - self.make_workflow_ro_crate() - except Exception as e: - log.error(e) - sys.exit(1) + self.make_workflow_ro_crate() # Save just the JSON metadata file - if metadata_fn is not None: - log.info(f"Saving metadata file '{metadata_fn}'") + if metadata_path is not None: + log.info(f"Saving metadata file '{metadata_path}'") # Save the crate to a temporary directory tmpdir = Path(tempfile.mkdtemp(), "wf") self.crate.write(tmpdir) # Now save just the JSON file crate_json_fn = Path(tmpdir, "ro-crate-metadata.json") - crate_json_fn.rename(metadata_fn) + if metadata_path.name == "ro-crate-metadata.json": + crate_json_fn.rename(metadata_path) + else: + crate_json_fn.rename(metadata_path / "ro-crate-metadata.json") # Save the whole crate zip file - if zip_fn is not None: - log.info(f"Saving zip file '{zip_fn}'") - self.crate.write_zip(zip_fn) + if zip_path is not None: + if zip_path.name == "ro-crate.crate.zip": + log.info(f"Saving zip file '{zip_path}'") + self.crate.write_zip(zip_path) + else: + log.info(f"Saving zip file '{zip_path}/ro-crate.crate.zip;") + self.crate.write_zip(zip_path / "ro-crate.crate.zip") # Change back to the original directory os.chdir(current_path) @@ -108,9 +111,6 @@ def create_ro_crate( def make_workflow_ro_crate(self) -> None: """ Create an RO Crate for a pipeline - - Args: - path (Path): Path to the pipeline directory """ if self.pipeline_obj is None: raise ValueError("Pipeline object not loaded") @@ -153,8 +153,18 @@ def make_workflow_ro_crate(self) -> None: self.crate.description = readme.read_text() except FileNotFoundError: log.error(f"Could not find README.md in {self.pipeline_dir}") - - self.crate.license = "MIT" + # get license from LICENSE file + license_file = Path("LICENSE") + try: + license = license_file.read_text() + if license.startswith("MIT"): + self.crate.license = "MIT" + else: + # prompt for license + log.info("Could not determine license from LICENSE file") + self.crate.license = input("Please enter the license for this pipeline: ") + except FileNotFoundError: + log.error(f"Could not find LICENSE file in {self.pipeline_dir}") # add doi as identifier self.crate.name = f'Research Object Crate for {self.pipeline_obj.nf_config.get("manifest.name")}' @@ -290,4 +300,5 @@ def get_orcid(name: str) -> Union[str, None]: log.debug(f"No exact ORCID found for {name}. See {response.url}") return None else: - return f"API request unsuccessful. Status code: {response.status_code}" + log.info(f"API request to ORCID unsuccessful. Status code: {response.status_code}") + return None From bdbc045cf7ea9de20a313a742acb73babdc39981 Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 13:03:17 +0100 Subject: [PATCH 46/65] add git contributors --- nf_core/ro_crate.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/nf_core/ro_crate.py b/nf_core/ro_crate.py index 9ae77cab1d..ae3f6357ca 100644 --- a/nf_core/ro_crate.py +++ b/nf_core/ro_crate.py @@ -9,7 +9,7 @@ import requests import rocrate.model.entity import rocrate.rocrate -from git import GitCommandError, InvalidGitRepositoryError, Repo +from git import GitCommandError, InvalidGitRepositoryError from rocrate.model.person import Person from nf_core.utils import Pipeline @@ -70,8 +70,7 @@ def create_ro_crate( # using git checkout to get the requested version log.info(f"Checking out pipeline version {self.version}") try: - self.repo = Repo(self.pipeline_dir) - self.repo.git.checkout(self.version) + self.pipeline_obj.repo.git.checkout(self.version) self.pipeline_obj = Pipeline(str(self.pipeline_dir)) self.pipeline_obj._load() except InvalidGitRepositoryError: @@ -206,15 +205,36 @@ def set_main_entity(self, main_entity_filename: str): def add_main_authors(self, wf_file): """ Add workflow authors to the crate - NB: We don't have much metadata here - scope to improve in the future """ # add author entity to crate try: authors = self.pipeline_obj.nf_config["manifest.author"].split(",") + # remove spaces + authors = [a.strip() for a in authors] except KeyError: log.error("No author field found in manifest of nextflow.config") return + # look at git contributors for author names + try: + contributors = set() + + commits_touching_path = list(self.pipeline_obj.repo.iter_commits(paths="main.nf")) + + for commit in commits_touching_path: + contributors.add(commit.author.name) + # exclude bots + contributors = [c for c in contributors if not c.endswith("bot") or c != "Travis CI User"] + # remove usernames (just keep names with spaces) + contributors = [c for c in contributors if " " in c] + + log.debug(f"Found {len(contributors)} git authors") + for git_author in contributors: + if git_author not in authors: + authors.append(git_author) + except AttributeError: + log.debug("Could not find git authors") + for author in authors: log.debug(f"Adding author: {author}") orcid = get_orcid(author) From 3ba54c48279ee27235f82a179b4f66fdacf5fdc3 Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 13:31:24 +0100 Subject: [PATCH 47/65] fix filename for CI --- .github/workflows/create-test-lint-wf-template.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/create-test-lint-wf-template.yml b/.github/workflows/create-test-lint-wf-template.yml index d8df2f6905..345326729c 100644 --- a/.github/workflows/create-test-lint-wf-template.yml +++ b/.github/workflows/create-test-lint-wf-template.yml @@ -112,6 +112,8 @@ jobs: run: | cd create-test-lint-wf nf-core --log-file log.txt pipelines create -n testpipeline -d "This pipeline is for testing" -a "Testing McTestface" --template-yaml template_skip_${{ matrix.TEMPLATE }}.yml + # fake ro-crate + touch nf-core-testpipeline.crate.json - name: run the pipeline run: | From 9a66883f96fafc2d970a999ba078e8e255e6bf4a Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 13:42:51 +0100 Subject: [PATCH 48/65] better help hint message --- .github/workflows/create-test-lint-wf-template.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/create-test-lint-wf-template.yml b/.github/workflows/create-test-lint-wf-template.yml index 345326729c..33352fe896 100644 --- a/.github/workflows/create-test-lint-wf-template.yml +++ b/.github/workflows/create-test-lint-wf-template.yml @@ -113,7 +113,7 @@ jobs: cd create-test-lint-wf nf-core --log-file log.txt pipelines create -n testpipeline -d "This pipeline is for testing" -a "Testing McTestface" --template-yaml template_skip_${{ matrix.TEMPLATE }}.yml # fake ro-crate - touch nf-core-testpipeline.crate.json + touch ro-crate-metadata.json - name: run the pipeline run: | From a6566176ccf182d8ccc84e2644ed9ada3fd02c88 Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 14:22:37 +0100 Subject: [PATCH 49/65] fix class name --- nf_core/ro_crate.py | 2 +- tests/test_rocrate.py | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/nf_core/ro_crate.py b/nf_core/ro_crate.py index ae3f6357ca..0c3b486ac2 100644 --- a/nf_core/ro_crate.py +++ b/nf_core/ro_crate.py @@ -17,7 +17,7 @@ log = logging.getLogger(__name__) -class RoCrate: +class ROCrate: """ Class to generate an RO Crate for a pipeline diff --git a/tests/test_rocrate.py b/tests/test_rocrate.py index 3db1876c04..83911eab6e 100644 --- a/tests/test_rocrate.py +++ b/tests/test_rocrate.py @@ -54,16 +54,14 @@ def test_rocrate_creation(self): """Run the nf-core rocrate command""" # Run the command - self.rocrate_obj = nf_core.rocrate.RoCrate(self.test_pipeline_dir) - self.rocrate_obj.create_ro_crate( - self.test_pipeline_dir, metadata_fn=Path(self.test_pipeline_dir, "ro-crate-metadata.json") - ) + self.rocrate_obj = nf_core.rocrate.ROCrate(self.test_pipeline_dir) + self.rocrate_obj.create_ro_crate(self.test_pipeline_dir, metadata_path=Path(self.test_pipeline_dir)) # Check that the crate was created self.assertTrue(Path(self.test_pipeline_dir, "ro-crate-metadata.json").exists()) # Check that the entries in the crate are correct - crate = rocrate.rocrate.ROCrate(str(self.test_pipeline_dir)) + crate = rocrate.rocrate.ROCrate(self.test_pipeline_dir) entities = crate.get_entities() # Check if the correct entities are set: From fb0d091c6327a266148278d363eecea25a752aff Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 14:24:08 +0100 Subject: [PATCH 50/65] add correct parent directory to faked crate --- .github/workflows/create-test-lint-wf-template.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/create-test-lint-wf-template.yml b/.github/workflows/create-test-lint-wf-template.yml index 33352fe896..d00288fcb5 100644 --- a/.github/workflows/create-test-lint-wf-template.yml +++ b/.github/workflows/create-test-lint-wf-template.yml @@ -113,7 +113,7 @@ jobs: cd create-test-lint-wf nf-core --log-file log.txt pipelines create -n testpipeline -d "This pipeline is for testing" -a "Testing McTestface" --template-yaml template_skip_${{ matrix.TEMPLATE }}.yml # fake ro-crate - touch ro-crate-metadata.json + touch my-prefix-testpipeline/ro-crate-metadata.json - name: run the pipeline run: | From 2469697dcc166cd1a3da4bf5e402e316f87ee77e Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 25 Jan 2024 15:17:59 +0100 Subject: [PATCH 51/65] add empty ro-crate after sync --- .github/workflows/create-test-lint-wf-template.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/create-test-lint-wf-template.yml b/.github/workflows/create-test-lint-wf-template.yml index d00288fcb5..a59e440101 100644 --- a/.github/workflows/create-test-lint-wf-template.yml +++ b/.github/workflows/create-test-lint-wf-template.yml @@ -114,6 +114,7 @@ jobs: nf-core --log-file log.txt pipelines create -n testpipeline -d "This pipeline is for testing" -a "Testing McTestface" --template-yaml template_skip_${{ matrix.TEMPLATE }}.yml # fake ro-crate touch my-prefix-testpipeline/ro-crate-metadata.json + git commit -am "add ro-crate" - name: run the pipeline run: | From f521fa3e043ae1af9f340129dbd0405affd25226 Mon Sep 17 00:00:00 2001 From: mashehu Date: Wed, 31 Jul 2024 14:01:16 +0200 Subject: [PATCH 52/65] fix tests --- nf_core/commands_pipelines.py | 21 +++ nf_core/pipelines/rocrate.py | 324 ++++++++++++++++++++++++++++++++++ tests/test_rocrate.py | 15 +- 3 files changed, 352 insertions(+), 8 deletions(-) create mode 100644 nf_core/pipelines/rocrate.py diff --git a/nf_core/commands_pipelines.py b/nf_core/commands_pipelines.py index 1186935e52..09a8ca5287 100644 --- a/nf_core/commands_pipelines.py +++ b/nf_core/commands_pipelines.py @@ -277,6 +277,27 @@ def pipelines_list(ctx, keywords, sort, json, show_archived): stdout.print(list_workflows(keywords, sort, json, show_archived)) +# nf-core pipelines ro-crate +def pipelines_ro_crate(ctx, pipeline_dir, json_path, zip_path, pipeline_version) -> None: + from nf_core.pipelines.rocrate import ROCrate + + if json_path is None and zip_path is None: + log.error("Either `--json_path` or `--zip_path` must be specified.") + sys.exit(1) + else: + pipeline_dir = Path(pipeline_dir) + if json_path is not None: + json_path = Path(json_path) + if zip_path is not None: + zip_path = Path(zip_path) + try: + rocrate_obj = ROCrate(pipeline_dir, pipeline_version) + rocrate_obj.create_ro_crate(pipeline_dir, metadata_path=json_path, zip_path=zip_path) + except (UserWarning, LookupError, FileNotFoundError) as e: + log.error(e) + sys.exit(1) + + # nf-core pipelines sync def pipelines_sync(ctx, directory, from_branch, pull_request, github_repository, username, template_yaml, force_pr): """ diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py new file mode 100644 index 0000000000..0c3b486ac2 --- /dev/null +++ b/nf_core/pipelines/rocrate.py @@ -0,0 +1,324 @@ +#!/usr/bin/env python +"""Code to deal with pipeline RO (Research Object) Crates""" + +import logging +import tempfile +from pathlib import Path +from typing import Union + +import requests +import rocrate.model.entity +import rocrate.rocrate +from git import GitCommandError, InvalidGitRepositoryError +from rocrate.model.person import Person + +from nf_core.utils import Pipeline + +log = logging.getLogger(__name__) + + +class ROCrate: + """ + Class to generate an RO Crate for a pipeline + + Args: + pipeline_dir (Path): Path to the pipeline directory + version (str): Version of the pipeline to use + + """ + + def __init__(self, pipeline_dir: Path, version=""): + from nf_core.utils import is_pipeline_directory, setup_requests_cachedir + + is_pipeline_directory(pipeline_dir) + self.pipeline_dir = pipeline_dir + self.version = version + self.crate: rocrate.rocrate.ROCrate + self.pipeline_obj = Pipeline(str(self.pipeline_dir)) + self.pipeline_obj._load() + + setup_requests_cachedir() + + def create_ro_crate( + self, outdir: Path, metadata_path: Union[None, Path] = None, zip_path: Union[None, Path] = None + ) -> None: + """ + Create an RO Crate for a pipeline + + Args: + outdir (Path): Path to the output directory + metadata_path (Path): Path to the metadata file + zip_path (Path): Path to the zip file + + """ + import os + + # Set input paths + try: + self.set_crate_paths(outdir) + except OSError as e: + log.error(e) + sys.exit(1) + + # Change to the pipeline directory, because the RO Crate doesn't handle relative paths well + current_path = Path.cwd() + os.chdir(self.pipeline_dir) + + # Check that the checkout pipeline version is the same as the requested version + if self.version: + if self.version != self.pipeline_obj.nf_config.get("manifest.version"): + # using git checkout to get the requested version + log.info(f"Checking out pipeline version {self.version}") + try: + self.pipeline_obj.repo.git.checkout(self.version) + self.pipeline_obj = Pipeline(str(self.pipeline_dir)) + self.pipeline_obj._load() + except InvalidGitRepositoryError: + log.error(f"Could not find a git repository in {self.pipeline_dir}") + sys.exit(1) + except GitCommandError: + log.error(f"Could not checkout version {self.version}") + sys.exit(1) + + self.make_workflow_ro_crate() + + # Save just the JSON metadata file + if metadata_path is not None: + log.info(f"Saving metadata file '{metadata_path}'") + # Save the crate to a temporary directory + tmpdir = Path(tempfile.mkdtemp(), "wf") + self.crate.write(tmpdir) + # Now save just the JSON file + crate_json_fn = Path(tmpdir, "ro-crate-metadata.json") + if metadata_path.name == "ro-crate-metadata.json": + crate_json_fn.rename(metadata_path) + else: + crate_json_fn.rename(metadata_path / "ro-crate-metadata.json") + + # Save the whole crate zip file + if zip_path is not None: + if zip_path.name == "ro-crate.crate.zip": + log.info(f"Saving zip file '{zip_path}'") + self.crate.write_zip(zip_path) + else: + log.info(f"Saving zip file '{zip_path}/ro-crate.crate.zip;") + self.crate.write_zip(zip_path / "ro-crate.crate.zip") + + # Change back to the original directory + os.chdir(current_path) + + def make_workflow_ro_crate(self) -> None: + """ + Create an RO Crate for a pipeline + """ + if self.pipeline_obj is None: + raise ValueError("Pipeline object not loaded") + + # Create the RO Crate object + self.crate = rocrate.rocrate.ROCrate() + + # Set language type + programming_language = rocrate.model.entity.Entity( + self.crate, + "#nextflow", + properties={ + "@type": ["ComputerLanguage", "SoftwareApplication"], + "name": "Nextflow", + "url": "https://www.nextflow.io/", + "identifier": "https://www.nextflow.io/", + "version": self.pipeline_obj.nf_config.get("manifest.nextflowVersion", ""), + }, + ) + self.crate.add(programming_language) + + # Conform to RO-Crate 1.1 and workflowhub-ro-crate + self.crate.update_jsonld( + { + "@id": "ro-crate-metadata.json", + "conformsTo": [ + {"@id": "https://w3id.org/ro/crate/1.1"}, + {"@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0"}, + ], + } + ) + + # Set main entity file + self.set_main_entity("main.nf") + + # add readme as description + readme = Path("README.md") + + try: + self.crate.description = readme.read_text() + except FileNotFoundError: + log.error(f"Could not find README.md in {self.pipeline_dir}") + # get license from LICENSE file + license_file = Path("LICENSE") + try: + license = license_file.read_text() + if license.startswith("MIT"): + self.crate.license = "MIT" + else: + # prompt for license + log.info("Could not determine license from LICENSE file") + self.crate.license = input("Please enter the license for this pipeline: ") + except FileNotFoundError: + log.error(f"Could not find LICENSE file in {self.pipeline_dir}") + + # add doi as identifier + self.crate.name = f'Research Object Crate for {self.pipeline_obj.nf_config.get("manifest.name")}' + + if "dev" in self.pipeline_obj.nf_config.get("manifest.version", ""): + self.crate.CreativeWorkStatus = "InProgress" + else: + self.crate.CreativeWorkStatus = "Stable" + + # Add all other files + self.add_workflow_files() + + def set_main_entity(self, main_entity_filename: str): + """ + Set the main.nf as the main entity of the crate and add necessary metadata + """ + + wf_file = self.crate.add_jsonld( + { + "@id": main_entity_filename, + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + }, + ) + self.crate.mainEntity = wf_file + self.add_main_authors(wf_file) + wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}) + # get keywords from nf-core website + remote_workflows = requests.get("https://nf-co.re/pipelines.json").json()["remote_workflows"] + # go through all remote workflows and find the one that matches the pipeline name + topics = ["nf-core", "nextflow"] + for remote_wf in remote_workflows: + if remote_wf["name"] == self.pipeline_obj.pipeline_name.replace("nf-core/", ""): + topics = topics + remote_wf["topics"] + break + + log.debug(f"Adding topics: {topics}") + wf_file.append_to("keywords", topics) + + def add_main_authors(self, wf_file): + """ + Add workflow authors to the crate + """ + # add author entity to crate + + try: + authors = self.pipeline_obj.nf_config["manifest.author"].split(",") + # remove spaces + authors = [a.strip() for a in authors] + except KeyError: + log.error("No author field found in manifest of nextflow.config") + return + # look at git contributors for author names + try: + contributors = set() + + commits_touching_path = list(self.pipeline_obj.repo.iter_commits(paths="main.nf")) + + for commit in commits_touching_path: + contributors.add(commit.author.name) + # exclude bots + contributors = [c for c in contributors if not c.endswith("bot") or c != "Travis CI User"] + # remove usernames (just keep names with spaces) + contributors = [c for c in contributors if " " in c] + + log.debug(f"Found {len(contributors)} git authors") + for git_author in contributors: + if git_author not in authors: + authors.append(git_author) + except AttributeError: + log.debug("Could not find git authors") + + for author in authors: + log.debug(f"Adding author: {author}") + orcid = get_orcid(author) + author_entitity = self.crate.add(Person(self.crate, orcid, properties={"name": author})) + wf_file.append_to("author", author_entitity) + + def add_workflow_files(self): + """ + Add workflow files to the RO Crate + """ + import nf_core.utils + + wf_filenames = nf_core.utils.get_wf_files(Path.cwd()) + # exclude github action files + wf_filenames = [fn for fn in wf_filenames if not fn.startswith(".github/")] + log.debug(f"Adding {len(wf_filenames)} workflow files") + for fn in wf_filenames: + # skip main.nf + if fn == "main.nf": + continue + # add nextflow language to .nf and .config files + if fn.endswith(".nf") or fn.endswith(".config") or fn.endswith(".nf.test"): + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn, properties={"programmingLanguage": {"@id": "#nextflow"}}) + continue + if fn.endswith(".png"): + log.debug(f"Adding workflow image file: {fn}") + self.crate.add_jsonld({"@id": Path(fn).name, "@type": ["File", "ImageObject"]}) + if "metro_map" in fn: + log.info(f"Setting main entity image to: {fn}") + self.crate.mainEntity.append_to("image", {"@id": Path(fn).name}) + continue + if fn.endswith(".md"): + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn, properties={"encodingFormat": "text/markdown"}) + continue + else: + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn) + continue + + def set_crate_paths(self, path: Path) -> None: + """Given a pipeline name, directory, or path, set wf_crate_filename""" + + path = Path(path) + + if path.is_dir(): + self.pipeline_dir = path + # wf_crate_filename = path / "ro-crate-metadata.json" + elif path.is_file(): + self.pipeline_dir = path.parent + # wf_crate_filename = path + + # Check that the schema file exists + if self.pipeline_dir is None: + raise OSError(f"Could not find pipeline '{path}'") + + +def get_orcid(name: str) -> Union[str, None]: + """ + Get the ORCID for a given name + + Args: + name (str): Name of the author + + Returns: + str: ORCID URI or None + """ + base_url = "https://pub.orcid.org/v3.0/search/" + headers = { + "Accept": "application/json", + } + params = {"q": f'family-name:"{name.split()[-1]}" AND given-names:"{name.split()[0]}"'} + response = requests.get(base_url, params=params, headers=headers) + + if response.status_code == 200: + json_response = response.json() + if json_response.get("num-found") == 1: + orcid_uri = json_response.get("result")[0].get("orcid-identifier", {}).get("uri") + log.info(f"Using found ORCID for {name}. Please double-check: {orcid_uri}") + return orcid_uri + else: + log.debug(f"No exact ORCID found for {name}. See {response.url}") + return None + else: + log.info(f"API request to ORCID unsuccessful. Status code: {response.status_code}") + return None diff --git a/tests/test_rocrate.py b/tests/test_rocrate.py index 83911eab6e..fe3cf2e8e0 100644 --- a/tests/test_rocrate.py +++ b/tests/test_rocrate.py @@ -1,5 +1,4 @@ -""" Test the nf-core rocrate command """ - +"""Test the nf-core pipelines rocrate command""" import shutil import tempfile @@ -9,8 +8,9 @@ import rocrate.rocrate from git import Repo -import nf_core.create -import nf_core.rocrate +import nf_core.pipelines.create +import nf_core.pipelines.create.create +import nf_core.pipelines.rocrate import nf_core.utils @@ -25,15 +25,14 @@ def setUp(self): self.tmp_dir = Path(tempfile.mkdtemp()) self.test_pipeline_dir = Path(self.tmp_dir, "nf-core-testpipeline") - self.create_obj = nf_core.create.PipelineCreate( + self.create_obj = nf_core.pipelines.create.create.PipelineCreate( name="testpipeline", description="This is a test pipeline", author="Test McTestFace", - outdir=self.test_pipeline_dir, + outdir=str(self.test_pipeline_dir), version="1.0.0", no_git=False, force=True, - plain=True, ) self.create_obj.init_pipeline() @@ -54,7 +53,7 @@ def test_rocrate_creation(self): """Run the nf-core rocrate command""" # Run the command - self.rocrate_obj = nf_core.rocrate.ROCrate(self.test_pipeline_dir) + self.rocrate_obj = nf_core.pipelines.rocrate.ROCrate(self.test_pipeline_dir) self.rocrate_obj.create_ro_crate(self.test_pipeline_dir, metadata_path=Path(self.test_pipeline_dir)) # Check that the crate was created From 797fefbc05292ed0f268c9d7f7783823a69b3645 Mon Sep 17 00:00:00 2001 From: mashehu Date: Wed, 31 Jul 2024 14:04:51 +0200 Subject: [PATCH 53/65] fix ci test --- .github/actions/create-lint-wf/action.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/actions/create-lint-wf/action.yml b/.github/actions/create-lint-wf/action.yml index ecd0eef873..dc24546146 100644 --- a/.github/actions/create-lint-wf/action.yml +++ b/.github/actions/create-lint-wf/action.yml @@ -65,6 +65,12 @@ runs: run: find nf-core-testpipeline -type f -exec sed -i 's/zenodo.XXXXXX/zenodo.123456/g' {} \; working-directory: create-lint-wf + # Add empty ro-crate file + - name: add empty ro-crate file + shell: bash + run: touch nf-core-testpipeline/ro-crate-metadata.json + working-directory: create-lint-wf + # Run nf-core pipelines linting - name: nf-core pipelines lint shell: bash From 9d5251f622904457f24df005baec48e056db3938 Mon Sep 17 00:00:00 2001 From: mashehu Date: Wed, 31 Jul 2024 17:54:22 +0200 Subject: [PATCH 54/65] use github to guess author name, set names as ids if no orcid --- nf_core/__main__.py | 31 ++++++++++++++++++++++++++++ nf_core/pipelines/rocrate.py | 40 +++++++++++++++++++++++++++--------- 2 files changed, 61 insertions(+), 10 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 08589fc242..d16c3a4e5b 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -569,6 +569,37 @@ def command_pipelines_list(ctx, keywords, sort, json, show_archived): pipelines_list(ctx, keywords, sort, json, show_archived) +# nf-core pipelines ro-crate +@pipelines.command("ro-crate") +@click.argument( + "pipeline_dir", + type=click.Path(exists=True), + default=Path.cwd(), + required=True, + metavar="", +) +@click.option( + "-j", + "--json_path", + default=Path.cwd(), + type=str, + help="Path to save RO Crate metadata json file to", +) +@click.option("-z", "--zip_path", type=str, help="Path to save RO Crate zip file to") +@click.option( + "-pv", + "--pipeline_version", + type=str, + help="Version of pipeline to use for RO Crate", +) +@click.pass_context +def ro_crate(ctx, pipeline_dir, json_path, zip_path, pipeline_version): + """ + Make an Research Object Crate + """ + pipelines_ro_crate(ctx, pipeline_dir, json_path, zip_path, pipeline_version) + + # nf-core pipelines sync @pipelines.command("sync") @click.pass_context diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index 0c3b486ac2..525c2287b5 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -3,8 +3,10 @@ import logging import tempfile +from datetime import datetime from pathlib import Path -from typing import Union +from typing import Set, Union, cast +from urllib.parse import quote import requests import rocrate.model.entity @@ -202,7 +204,7 @@ def set_main_entity(self, main_entity_filename: str): log.debug(f"Adding topics: {topics}") wf_file.append_to("keywords", topics) - def add_main_authors(self, wf_file): + def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: """ Add workflow authors to the crate """ @@ -217,34 +219,45 @@ def add_main_authors(self, wf_file): return # look at git contributors for author names try: - contributors = set() + contributors: Set[str] = set() commits_touching_path = list(self.pipeline_obj.repo.iter_commits(paths="main.nf")) for commit in commits_touching_path: - contributors.add(commit.author.name) + if commit.author.name is not None: + contributors.add(commit.author.name) # exclude bots - contributors = [c for c in contributors if not c.endswith("bot") or c != "Travis CI User"] - # remove usernames (just keep names with spaces) - contributors = [c for c in contributors if " " in c] + contributors = {c for c in contributors if not c.endswith("bot") and c != "Travis CI User"} log.debug(f"Found {len(contributors)} git authors") for git_author in contributors: + git_author = requests.get(f"https://api.github.com/users/{git_author}").json().get("name", git_author) + if git_author is None: + log.debug(f"Could not find name for {git_author}") + continue + if git_author not in authors: authors.append(git_author) except AttributeError: log.debug("Could not find git authors") + # remove usernames (just keep names with spaces) + authors = [c for c in authors if " " in c] + for author in authors: log.debug(f"Adding author: {author}") orcid = get_orcid(author) - author_entitity = self.crate.add(Person(self.crate, orcid, properties={"name": author})) - wf_file.append_to("author", author_entitity) + author_entitity = self.crate.add( + Person(self.crate, orcid if orcid is not None else "#" + quote(author), properties={"name": author}) + ) + wf_file.append_to("creator", author_entitity) def add_workflow_files(self): """ Add workflow files to the RO Crate """ + import re + import nf_core.utils wf_filenames = nf_core.utils.get_wf_files(Path.cwd()) @@ -263,8 +276,15 @@ def add_workflow_files(self): if fn.endswith(".png"): log.debug(f"Adding workflow image file: {fn}") self.crate.add_jsonld({"@id": Path(fn).name, "@type": ["File", "ImageObject"]}) - if "metro_map" in fn: + if re.search(r"(metro|tube)_?(map)?", fn) and self.crate.mainEntity is not None: log.info(f"Setting main entity image to: {fn}") + # check if image is set in main entity + if self.crate.mainEntity.get("image"): + log.info( + f"Main entity already has an image: {self.crate.mainEntity.get('image')}, replacing it with: {fn}" + ) + else: + log.info(f"Setting main entity image to: {fn}") self.crate.mainEntity.append_to("image", {"@id": Path(fn).name}) continue if fn.endswith(".md"): From a605ae69c27850be28dac5dccdf57dfab3ae8b1b Mon Sep 17 00:00:00 2001 From: mashehu Date: Wed, 31 Jul 2024 17:55:47 +0200 Subject: [PATCH 55/65] add bioschemas, datecreated and datemodified (only set to current time atm) --- nf_core/pipelines/rocrate.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index 525c2287b5..2723d62434 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -23,13 +23,16 @@ class ROCrate: """ Class to generate an RO Crate for a pipeline - Args: - pipeline_dir (Path): Path to the pipeline directory - version (str): Version of the pipeline to use - """ - def __init__(self, pipeline_dir: Path, version=""): + def __init__(self, pipeline_dir: Path, version="") -> None: + """ + Initialise the ROCrate object + + Args: + pipeline_dir (Path): Path to the pipeline directory + version (str): Version of the pipeline to checkout + """ from nf_core.utils import is_pipeline_directory, setup_requests_cachedir is_pipeline_directory(pipeline_dir) @@ -144,9 +147,6 @@ def make_workflow_ro_crate(self) -> None: } ) - # Set main entity file - self.set_main_entity("main.nf") - # add readme as description readme = Path("README.md") @@ -175,6 +175,9 @@ def make_workflow_ro_crate(self) -> None: else: self.crate.CreativeWorkStatus = "Stable" + # Set main entity file + self.set_main_entity("main.nf") + # Add all other files self.add_workflow_files() @@ -182,16 +185,21 @@ def set_main_entity(self, main_entity_filename: str): """ Set the main.nf as the main entity of the crate and add necessary metadata """ - wf_file = self.crate.add_jsonld( { "@id": main_entity_filename, "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], }, - ) + ) # FIXME: this adds "#main.nf" to the crate, but it should be "main.nf" + wf_file = cast(rocrate.model.entity.Entity, wf_file) # ro-crate is untyped so need to cast type manually self.crate.mainEntity = wf_file self.add_main_authors(wf_file) wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}) + wf_file.append_to("dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/") + # add dateCreated and dateModified, based on the current data + wf_file.append_to("dateCreated", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) + wf_file.append_to("dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) + # get keywords from nf-core website remote_workflows = requests.get("https://nf-co.re/pipelines.json").json()["remote_workflows"] # go through all remote workflows and find the one that matches the pipeline name From 6c6b8c923c13f899d3cd14a5cb09e58e82c67c6a Mon Sep 17 00:00:00 2001 From: mashehu Date: Mon, 5 Aug 2024 07:58:47 +0200 Subject: [PATCH 56/65] follow `ComputationalWorkflow` schema for main entitty. --- nf_core/__main__.py | 9 ++- nf_core/commands_pipelines.py | 9 ++- nf_core/pipelines/rocrate.py | 135 +++++++++++++++++++++++++--------- 3 files changed, 116 insertions(+), 37 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index d16c3a4e5b..13366a4b8b 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -591,9 +591,16 @@ def command_pipelines_list(ctx, keywords, sort, json, show_archived): "--pipeline_version", type=str, help="Version of pipeline to use for RO Crate", + default="", ) @click.pass_context -def ro_crate(ctx, pipeline_dir, json_path, zip_path, pipeline_version): +def ro_crate( + ctx, + pipeline_dir: str, + json_path: str, + zip_path: str, + pipeline_version: str, +): """ Make an Research Object Crate """ diff --git a/nf_core/commands_pipelines.py b/nf_core/commands_pipelines.py index 09a8ca5287..a0fbe0838a 100644 --- a/nf_core/commands_pipelines.py +++ b/nf_core/commands_pipelines.py @@ -2,6 +2,7 @@ import os import sys from pathlib import Path +from typing import Optional, Union import rich @@ -278,7 +279,13 @@ def pipelines_list(ctx, keywords, sort, json, show_archived): # nf-core pipelines ro-crate -def pipelines_ro_crate(ctx, pipeline_dir, json_path, zip_path, pipeline_version) -> None: +def pipelines_ro_crate( + ctx, + pipeline_dir: Union[str, Path], + json_path: Optional[Union[str, Path]], + zip_path: Optional[Union[str, Path]], + pipeline_version: str, +) -> None: from nf_core.pipelines.rocrate import ROCrate if json_path is None and zip_path is None: diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index 2723d62434..8ab3584189 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -2,18 +2,22 @@ """Code to deal with pipeline RO (Research Object) Crates""" import logging +import os +import sys import tempfile from datetime import datetime from pathlib import Path -from typing import Set, Union, cast +from typing import Dict, List, Optional, Set, Union, cast from urllib.parse import quote import requests import rocrate.model.entity import rocrate.rocrate from git import GitCommandError, InvalidGitRepositoryError +from rich.progress import BarColumn, Progress from rocrate.model.person import Person +from nf_core.pipelines.schema import PipelineSchema from nf_core.utils import Pipeline log = logging.getLogger(__name__) @@ -37,10 +41,14 @@ def __init__(self, pipeline_dir: Path, version="") -> None: is_pipeline_directory(pipeline_dir) self.pipeline_dir = pipeline_dir - self.version = version + self.version: str = version self.crate: rocrate.rocrate.ROCrate self.pipeline_obj = Pipeline(str(self.pipeline_dir)) self.pipeline_obj._load() + self.pipeline_obj.schema_obj = PipelineSchema() + # Assume we're in a pipeline dir root if schema path not set + self.pipeline_obj.schema_obj.get_schema_path(self.pipeline_dir) + self.pipeline_obj.schema_obj.load_schema() setup_requests_cachedir() @@ -70,7 +78,7 @@ def create_ro_crate( os.chdir(self.pipeline_dir) # Check that the checkout pipeline version is the same as the requested version - if self.version: + if self.version != "": if self.version != self.pipeline_obj.nf_config.get("manifest.version"): # using git checkout to get the requested version log.info(f"Checking out pipeline version {self.version}") @@ -84,7 +92,7 @@ def create_ro_crate( except GitCommandError: log.error(f"Could not checkout version {self.version}") sys.exit(1) - + self.version = self.pipeline_obj.nf_config.get("manifest.version", "") self.make_workflow_ro_crate() # Save just the JSON metadata file @@ -167,38 +175,67 @@ def make_workflow_ro_crate(self) -> None: except FileNotFoundError: log.error(f"Could not find LICENSE file in {self.pipeline_dir}") - # add doi as identifier - self.crate.name = f'Research Object Crate for {self.pipeline_obj.nf_config.get("manifest.name")}' + self.crate.name = self.pipeline_obj.nf_config.get("manifest.name") + + self.crate.root_dataset.append_to("version", self.version, compact=True) - if "dev" in self.pipeline_obj.nf_config.get("manifest.version", ""): + if "dev" in self.version: self.crate.CreativeWorkStatus = "InProgress" else: self.crate.CreativeWorkStatus = "Stable" + tags = self.pipeline_obj.repo.tags + if tags: + # get the tag for this version + for tag in tags: + if tag.commit.hexsha == self.pipeline_obj.repo.head.commit.hexsha: + self.crate.root_dataset.append_to( + "dateCreated", tag.commit.committed_datetime.strftime("%Y-%m-%dT%H:%M:%SZ"), compact=True + ) - # Set main entity file - self.set_main_entity("main.nf") + self.crate.add_jsonld( + {"@id": "https://nf-co.re/", "@type": "Organization", "name": "nf-core", "url": "https://nf-co.re/"} + ) # Add all other files self.add_workflow_files() + # Set main entity file + self.set_main_entity("main.nf") + def set_main_entity(self, main_entity_filename: str): """ Set the main.nf as the main entity of the crate and add necessary metadata """ - wf_file = self.crate.add_jsonld( - { - "@id": main_entity_filename, - "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], - }, - ) # FIXME: this adds "#main.nf" to the crate, but it should be "main.nf" + wf_file = self.crate.add_file( + main_entity_filename, + properties={"@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"]}, + ) wf_file = cast(rocrate.model.entity.Entity, wf_file) # ro-crate is untyped so need to cast type manually - self.crate.mainEntity = wf_file - self.add_main_authors(wf_file) + wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}) wf_file.append_to("dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/") # add dateCreated and dateModified, based on the current data - wf_file.append_to("dateCreated", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) + wf_file.append_to("dateCreated", self.crate.get("dateCreated", ""), compact=True) wf_file.append_to("dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) + wf_file.append_to("sdPublisher", {"@id": "https://nf-co.re/"}) + if self.pipeline_obj.schema_obj is not None: + log.debug("input value") + + schema_input = self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["properties"][ + "input" + ] + input_value: Dict[str, Union[str, List[str], bool]] = { + "@type": ["PropertyValueSpecification", "FormalParameter"], + "default": schema_input.get("default", ""), + "encodingFormat": schema_input.get("mimetype", ""), + "valueRequired": "input" + in self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["required"], + "dct:conformsTo": "https://bioschemas.org/types/FormalParameter/1.0-RELEASE", + } + wf_file.append_to( + "input", + input_value, + ) # get keywords from nf-core website remote_workflows = requests.get("https://nf-co.re/pipelines.json").json()["remote_workflows"] @@ -212,6 +249,18 @@ def set_main_entity(self, main_entity_filename: str): log.debug(f"Adding topics: {topics}") wf_file.append_to("keywords", topics) + self.add_main_authors(wf_file) + + self.crate.mainEntity = wf_file + + wf_file.append_to("license", self.crate.license) + wf_file.append_to("name", self.crate.name) + + self.crate.add_file( + main_entity_filename, + properties=wf_file.properties(), + ) + def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: """ Add workflow authors to the crate @@ -222,43 +271,61 @@ def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: authors = self.pipeline_obj.nf_config["manifest.author"].split(",") # remove spaces authors = [a.strip() for a in authors] + # add manifest authors as maintainer to crate + except KeyError: log.error("No author field found in manifest of nextflow.config") return # look at git contributors for author names try: - contributors: Set[str] = set() + git_contributors: Set[str] = set() commits_touching_path = list(self.pipeline_obj.repo.iter_commits(paths="main.nf")) for commit in commits_touching_path: if commit.author.name is not None: - contributors.add(commit.author.name) + git_contributors.add(commit.author.name) # exclude bots - contributors = {c for c in contributors if not c.endswith("bot") and c != "Travis CI User"} + contributors = {c for c in git_contributors if not c.endswith("bot") and c != "Travis CI User"} log.debug(f"Found {len(contributors)} git authors") - for git_author in contributors: - git_author = requests.get(f"https://api.github.com/users/{git_author}").json().get("name", git_author) - if git_author is None: - log.debug(f"Could not find name for {git_author}") - continue - - if git_author not in authors: - authors.append(git_author) + + progress_bar = Progress( + "[bold blue]{task.description}", + BarColumn(bar_width=None), + "[magenta]{task.completed} of {task.total}[reset] » [bold yellow]{task.fields[test_name]}", + transient=True, + disable=os.environ.get("HIDE_PROGRESS", None) is not None, + ) + with progress_bar: + bump_progress = progress_bar.add_task( + "Searching for author names on GitHub", total=len(contributors), test_name="" + ) + + for git_author in contributors: + progress_bar.update(bump_progress, advance=1, test_name=git_author) + git_author = ( + requests.get(f"https://api.github.com/users/{git_author}").json().get("name", git_author) + ) + if git_author is None: + log.debug(f"Could not find name for {git_author}") + continue + except AttributeError: - log.debug("Could not find git authors") + log.debug("Could not find git contributors") # remove usernames (just keep names with spaces) - authors = [c for c in authors if " " in c] + named_contributors = {c for c in contributors if " " in c} - for author in authors: + for author in named_contributors: log.debug(f"Adding author: {author}") orcid = get_orcid(author) author_entitity = self.crate.add( Person(self.crate, orcid if orcid is not None else "#" + quote(author), properties={"name": author}) ) wf_file.append_to("creator", author_entitity) + if author in authors: + wf_file.append_to("maintainer", author_entitity) def add_workflow_files(self): """ @@ -307,8 +374,6 @@ def add_workflow_files(self): def set_crate_paths(self, path: Path) -> None: """Given a pipeline name, directory, or path, set wf_crate_filename""" - path = Path(path) - if path.is_dir(): self.pipeline_dir = path # wf_crate_filename = path / "ro-crate-metadata.json" @@ -321,7 +386,7 @@ def set_crate_paths(self, path: Path) -> None: raise OSError(f"Could not find pipeline '{path}'") -def get_orcid(name: str) -> Union[str, None]: +def get_orcid(name: str) -> Optional[str]: """ Get the ORCID for a given name From c2f837caeec6c6e9b6fecbd7a7edecbd245551ec Mon Sep 17 00:00:00 2001 From: mashehu Date: Mon, 5 Aug 2024 13:59:10 +0200 Subject: [PATCH 57/65] fix date created field --- nf_core/pipelines/rocrate.py | 17 ++++------------- tests/test_rocrate.py | 4 ++-- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index 8ab3584189..88b73e2122 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -97,9 +97,9 @@ def create_ro_crate( # Save just the JSON metadata file if metadata_path is not None: - log.info(f"Saving metadata file '{metadata_path}'") + log.info(f"Saving metadata file to '{metadata_path}'") # Save the crate to a temporary directory - tmpdir = Path(tempfile.mkdtemp(), "wf") + tmpdir = Path(tempfile.TemporaryDirectory().name) self.crate.write(tmpdir) # Now save just the JSON file crate_json_fn = Path(tmpdir, "ro-crate-metadata.json") @@ -198,7 +198,6 @@ def make_workflow_ro_crate(self) -> None: # Add all other files self.add_workflow_files() - # Set main entity file self.set_main_entity("main.nf") @@ -215,7 +214,7 @@ def set_main_entity(self, main_entity_filename: str): wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}) wf_file.append_to("dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/") # add dateCreated and dateModified, based on the current data - wf_file.append_to("dateCreated", self.crate.get("dateCreated", ""), compact=True) + wf_file.append_to("dateCreated", self.crate.root_dataset.get("dateCreated", ""), compact=True) wf_file.append_to("dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) wf_file.append_to("sdPublisher", {"@id": "https://nf-co.re/"}) if self.pipeline_obj.schema_obj is not None: @@ -256,11 +255,6 @@ def set_main_entity(self, main_entity_filename: str): wf_file.append_to("license", self.crate.license) wf_file.append_to("name", self.crate.name) - self.crate.add_file( - main_entity_filename, - properties=wf_file.properties(), - ) - def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: """ Add workflow authors to the crate @@ -337,12 +331,9 @@ def add_workflow_files(self): wf_filenames = nf_core.utils.get_wf_files(Path.cwd()) # exclude github action files - wf_filenames = [fn for fn in wf_filenames if not fn.startswith(".github/")] + wf_filenames = [fn for fn in wf_filenames if not fn.startswith(".github/") and not fn == "main.nf"] log.debug(f"Adding {len(wf_filenames)} workflow files") for fn in wf_filenames: - # skip main.nf - if fn == "main.nf": - continue # add nextflow language to .nf and .config files if fn.endswith(".nf") or fn.endswith(".config") or fn.endswith(".nf.test"): log.debug(f"Adding workflow file: {fn}") diff --git a/tests/test_rocrate.py b/tests/test_rocrate.py index fe3cf2e8e0..eac61fac48 100644 --- a/tests/test_rocrate.py +++ b/tests/test_rocrate.py @@ -67,8 +67,8 @@ def test_rocrate_creation(self): for entity in entities: entity_json = entity.as_jsonld() if entity_json["@id"] == "./": - self.assertEqual(entity_json.get("name"), "Research Object Crate for nf-core/testpipeline") - self.assertEqual(entity_json["mainEntity"], {"@id": "#main.nf"}) + self.assertEqual(entity_json.get("name"), "nf-core/testpipeline") + self.assertEqual(entity_json["mainEntity"], {"@id": "main.nf"}) elif entity_json["@id"] == "#main.nf": self.assertEqual(entity_json["programmingLanguage"], [{"@id": "#nextflow"}]) self.assertEqual(entity_json["image"], [{"@id": "nf-core-testpipeline_metro_map.png"}]) From 0778b2ac953da54a87413d033f0d69175dbc3643 Mon Sep 17 00:00:00 2001 From: mashehu Date: Mon, 5 Aug 2024 14:43:12 +0200 Subject: [PATCH 58/65] add "about" field to workflow diagram --- nf_core/pipelines/rocrate.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index 88b73e2122..c09667c110 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -196,10 +196,10 @@ def make_workflow_ro_crate(self) -> None: {"@id": "https://nf-co.re/", "@type": "Organization", "name": "nf-core", "url": "https://nf-co.re/"} ) - # Add all other files - self.add_workflow_files() # Set main entity file self.set_main_entity("main.nf") + # Add all other files + self.add_workflow_files() def set_main_entity(self, main_entity_filename: str): """ @@ -341,7 +341,7 @@ def add_workflow_files(self): continue if fn.endswith(".png"): log.debug(f"Adding workflow image file: {fn}") - self.crate.add_jsonld({"@id": Path(fn).name, "@type": ["File", "ImageObject"]}) + self.crate.add_jsonld({"@id": fn, "@type": ["File", "ImageObject"]}) if re.search(r"(metro|tube)_?(map)?", fn) and self.crate.mainEntity is not None: log.info(f"Setting main entity image to: {fn}") # check if image is set in main entity @@ -351,6 +351,7 @@ def add_workflow_files(self): ) else: log.info(f"Setting main entity image to: {fn}") + self.crate.update_jsonld({"@id": "#" + fn, "about": {"@id": self.crate.mainEntity.id}}) self.crate.mainEntity.append_to("image", {"@id": Path(fn).name}) continue if fn.endswith(".md"): From ccfd6d94b7aceb27084939794f70875048d51c01 Mon Sep 17 00:00:00 2001 From: mashehu Date: Mon, 5 Aug 2024 15:58:16 +0200 Subject: [PATCH 59/65] add input as separate entity --- nf_core/pipelines/rocrate.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index c09667c110..3c508e7be0 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -217,6 +217,11 @@ def set_main_entity(self, main_entity_filename: str): wf_file.append_to("dateCreated", self.crate.root_dataset.get("dateCreated", ""), compact=True) wf_file.append_to("dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) wf_file.append_to("sdPublisher", {"@id": "https://nf-co.re/"}) + if self.version.endswith("dev"): + url = "dev" + else: + url = self.version + wf_file.append_to("url", {"@id": f"https://nf-co.re/{self.crate.name.replace('nf-core/','')}/{url}/"}) if self.pipeline_obj.schema_obj is not None: log.debug("input value") @@ -224,6 +229,7 @@ def set_main_entity(self, main_entity_filename: str): "input" ] input_value: Dict[str, Union[str, List[str], bool]] = { + "@id": "#input", "@type": ["PropertyValueSpecification", "FormalParameter"], "default": schema_input.get("default", ""), "encodingFormat": schema_input.get("mimetype", ""), @@ -231,9 +237,10 @@ def set_main_entity(self, main_entity_filename: str): in self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["required"], "dct:conformsTo": "https://bioschemas.org/types/FormalParameter/1.0-RELEASE", } + self.crate.add_jsonld(input_value) wf_file.append_to( "input", - input_value, + {"@id": "#input"}, ) # get keywords from nf-core website @@ -335,10 +342,10 @@ def add_workflow_files(self): log.debug(f"Adding {len(wf_filenames)} workflow files") for fn in wf_filenames: # add nextflow language to .nf and .config files - if fn.endswith(".nf") or fn.endswith(".config") or fn.endswith(".nf.test"): - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn, properties={"programmingLanguage": {"@id": "#nextflow"}}) - continue + # if fn.endswith(".nf") or fn.endswith(".config") or fn.endswith(".nf.test") and not fn.endswith("main.nf"): + # log.debug(f"Adding workflow file: {fn}") + # self.crate.add_file(fn, properties={"programmingLanguage": {"@id": "#nextflow"}}) + # continue if fn.endswith(".png"): log.debug(f"Adding workflow image file: {fn}") self.crate.add_jsonld({"@id": fn, "@type": ["File", "ImageObject"]}) @@ -358,10 +365,10 @@ def add_workflow_files(self): log.debug(f"Adding workflow file: {fn}") self.crate.add_file(fn, properties={"encodingFormat": "text/markdown"}) continue - else: - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn) - continue + # else: + # log.debug(f"Adding workflow file: {fn}") + # self.crate.add_file(fn) + # continue def set_crate_paths(self, path: Path) -> None: """Given a pipeline name, directory, or path, set wf_crate_filename""" From 776c6cca3b1eac19010df5a5631979a327e5e6c1 Mon Sep 17 00:00:00 2001 From: mashehu Date: Mon, 5 Aug 2024 16:33:50 +0200 Subject: [PATCH 60/65] add version to main entity --- nf_core/pipelines/rocrate.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index 3c508e7be0..d00178f83a 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -211,17 +211,20 @@ def set_main_entity(self, main_entity_filename: str): ) wf_file = cast(rocrate.model.entity.Entity, wf_file) # ro-crate is untyped so need to cast type manually - wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}) - wf_file.append_to("dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/") + wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}, compact=True) + wf_file.append_to( + "dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", compact=True + ) # add dateCreated and dateModified, based on the current data wf_file.append_to("dateCreated", self.crate.root_dataset.get("dateCreated", ""), compact=True) wf_file.append_to("dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) - wf_file.append_to("sdPublisher", {"@id": "https://nf-co.re/"}) + wf_file.append_to("sdPublisher", {"@id": "https://nf-co.re/"}, compact=True) if self.version.endswith("dev"): url = "dev" else: url = self.version - wf_file.append_to("url", {"@id": f"https://nf-co.re/{self.crate.name.replace('nf-core/','')}/{url}/"}) + wf_file.append_to("url", f"https://nf-co.re/{self.crate.name.replace('nf-core/','')}/{url}/", compact=True) + wf_file.append_to("version", self.version, compact=True) if self.pipeline_obj.schema_obj is not None: log.debug("input value") @@ -350,7 +353,6 @@ def add_workflow_files(self): log.debug(f"Adding workflow image file: {fn}") self.crate.add_jsonld({"@id": fn, "@type": ["File", "ImageObject"]}) if re.search(r"(metro|tube)_?(map)?", fn) and self.crate.mainEntity is not None: - log.info(f"Setting main entity image to: {fn}") # check if image is set in main entity if self.crate.mainEntity.get("image"): log.info( From a50f12f790894b0bd057c474a8b4f13e48ad501d Mon Sep 17 00:00:00 2001 From: mashehu Date: Fri, 16 Aug 2024 09:14:34 +0200 Subject: [PATCH 61/65] fix incorrect type --- nf_core/pipelines/rocrate.py | 154 +++++++++++++++++++---------------- nf_core/utils.py | 6 +- 2 files changed, 88 insertions(+), 72 deletions(-) diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index d00178f83a..252f4b6a0d 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -5,9 +5,8 @@ import os import sys import tempfile -from datetime import datetime from pathlib import Path -from typing import Dict, List, Optional, Set, Union, cast +from typing import Optional, Set, Union from urllib.parse import quote import requests @@ -43,7 +42,7 @@ def __init__(self, pipeline_dir: Path, version="") -> None: self.pipeline_dir = pipeline_dir self.version: str = version self.crate: rocrate.rocrate.ROCrate - self.pipeline_obj = Pipeline(str(self.pipeline_dir)) + self.pipeline_obj = Pipeline(self.pipeline_dir) self.pipeline_obj._load() self.pipeline_obj.schema_obj = PipelineSchema() # Assume we're in a pipeline dir root if schema path not set @@ -82,9 +81,12 @@ def create_ro_crate( if self.version != self.pipeline_obj.nf_config.get("manifest.version"): # using git checkout to get the requested version log.info(f"Checking out pipeline version {self.version}") + if self.pipeline_obj.repo is None: + log.error(f"Pipeline repository not found in {self.pipeline_dir}") + sys.exit(1) try: self.pipeline_obj.repo.git.checkout(self.version) - self.pipeline_obj = Pipeline(str(self.pipeline_dir)) + self.pipeline_obj = Pipeline(self.pipeline_dir) self.pipeline_obj._load() except InvalidGitRepositoryError: log.error(f"Could not find a git repository in {self.pipeline_dir}") @@ -183,14 +185,19 @@ def make_workflow_ro_crate(self) -> None: self.crate.CreativeWorkStatus = "InProgress" else: self.crate.CreativeWorkStatus = "Stable" - tags = self.pipeline_obj.repo.tags - if tags: - # get the tag for this version - for tag in tags: - if tag.commit.hexsha == self.pipeline_obj.repo.head.commit.hexsha: - self.crate.root_dataset.append_to( - "dateCreated", tag.commit.committed_datetime.strftime("%Y-%m-%dT%H:%M:%SZ"), compact=True - ) + if self.pipeline_obj.repo is None: + log.error(f"Pipeline repository not found in {self.pipeline_dir}") + else: + tags = self.pipeline_obj.repo.tags + if tags: + # get the tag for this version + for tag in tags: + if tag.commit.hexsha == self.pipeline_obj.repo.head.commit.hexsha: + self.crate.root_dataset.append_to( + "dateCreated", + tag.commit.committed_datetime.strftime("%Y-%m-%dT%H:%M:%SZ"), + compact=True, + ) self.crate.add_jsonld( {"@id": "https://nf-co.re/", "@type": "Organization", "name": "nf-core", "url": "https://nf-co.re/"} @@ -205,65 +212,72 @@ def set_main_entity(self, main_entity_filename: str): """ Set the main.nf as the main entity of the crate and add necessary metadata """ - wf_file = self.crate.add_file( + self.crate.add_workflow( # sets @type and conformsTo according to Workflow RO-Crate spec main_entity_filename, - properties={"@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"]}, + dest_path=main_entity_filename, + main=True, + lang="nextflow", # adds the #nextflow entity automatically and connects it to programmingLanguage + lang_version="X.Y.Z", # sets version on #nextflow ) - wf_file = cast(rocrate.model.entity.Entity, wf_file) # ro-crate is untyped so need to cast type manually - - wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}, compact=True) - wf_file.append_to( - "dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", compact=True - ) - # add dateCreated and dateModified, based on the current data - wf_file.append_to("dateCreated", self.crate.root_dataset.get("dateCreated", ""), compact=True) - wf_file.append_to("dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) - wf_file.append_to("sdPublisher", {"@id": "https://nf-co.re/"}, compact=True) - if self.version.endswith("dev"): - url = "dev" - else: - url = self.version - wf_file.append_to("url", f"https://nf-co.re/{self.crate.name.replace('nf-core/','')}/{url}/", compact=True) - wf_file.append_to("version", self.version, compact=True) - if self.pipeline_obj.schema_obj is not None: - log.debug("input value") - - schema_input = self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["properties"][ - "input" - ] - input_value: Dict[str, Union[str, List[str], bool]] = { - "@id": "#input", - "@type": ["PropertyValueSpecification", "FormalParameter"], - "default": schema_input.get("default", ""), - "encodingFormat": schema_input.get("mimetype", ""), - "valueRequired": "input" - in self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["required"], - "dct:conformsTo": "https://bioschemas.org/types/FormalParameter/1.0-RELEASE", - } - self.crate.add_jsonld(input_value) - wf_file.append_to( - "input", - {"@id": "#input"}, - ) - - # get keywords from nf-core website - remote_workflows = requests.get("https://nf-co.re/pipelines.json").json()["remote_workflows"] - # go through all remote workflows and find the one that matches the pipeline name - topics = ["nf-core", "nextflow"] - for remote_wf in remote_workflows: - if remote_wf["name"] == self.pipeline_obj.pipeline_name.replace("nf-core/", ""): - topics = topics + remote_wf["topics"] - break - - log.debug(f"Adding topics: {topics}") - wf_file.append_to("keywords", topics) - - self.add_main_authors(wf_file) - - self.crate.mainEntity = wf_file - - wf_file.append_to("license", self.crate.license) - wf_file.append_to("name", self.crate.name) + # wf_file = self.crate.add_file( + # main_entity_filename, + # properties={"@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"]}, + # ) + # wf_file = cast(rocrate.model.entity.Entity, wf_file) # ro-crate is untyped so need to cast type manually + + # wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}, compact=True) + # wf_file.append_to( + # "dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", compact=True + # ) + # # add dateCreated and dateModified, based on the current data + # wf_file.append_to("dateCreated", self.crate.root_dataset.get("dateCreated", ""), compact=True) + # wf_file.append_to("dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) + # wf_file.append_to("sdPublisher", {"@id": "https://nf-co.re/"}, compact=True) + # if self.version.endswith("dev"): + # url = "dev" + # else: + # url = self.version + # wf_file.append_to("url", f"https://nf-co.re/{self.crate.name.replace('nf-core/','')}/{url}/", compact=True) + # wf_file.append_to("version", self.version, compact=True) + # if self.pipeline_obj.schema_obj is not None: + # log.debug("input value") + + # schema_input = self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["properties"][ + # "input" + # ] + # input_value: Dict[str, Union[str, List[str], bool]] = { + # "@id": "#input", + # "@type": ["PropertyValueSpecification", "FormalParameter"], + # "default": schema_input.get("default", ""), + # "encodingFormat": schema_input.get("mimetype", ""), + # "valueRequired": "input" + # in self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["required"], + # "dct:conformsTo": "https://bioschemas.org/types/FormalParameter/1.0-RELEASE", + # } + # self.crate.add_jsonld(input_value) + # wf_file.append_to( + # "input", + # {"@id": "#input"}, + # ) + + # # get keywords from nf-core website + # remote_workflows = requests.get("https://nf-co.re/pipelines.json").json()["remote_workflows"] + # # go through all remote workflows and find the one that matches the pipeline name + # topics = ["nf-core", "nextflow"] + # for remote_wf in remote_workflows: + # if remote_wf["name"] == self.pipeline_obj.pipeline_name.replace("nf-core/", ""): + # topics = topics + remote_wf["topics"] + # break + + # log.debug(f"Adding topics: {topics}") + # wf_file.append_to("keywords", topics) + + # self.add_main_authors(wf_file) + + # self.crate.mainEntity = wf_file + + # wf_file.append_to("license", self.crate.license) + # wf_file.append_to("name", self.crate.name) def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: """ @@ -283,7 +297,7 @@ def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: # look at git contributors for author names try: git_contributors: Set[str] = set() - + assert self.pipeline_obj.repo is not None # mypy commits_touching_path = list(self.pipeline_obj.repo.iter_commits(paths="main.nf")) for commit in commits_touching_path: diff --git a/nf_core/utils.py b/nf_core/utils.py index 74dfe40238..70183648f8 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -36,6 +36,7 @@ from rich.spinner import Spinner import nf_core +from nf_core.pipelines.schema import PipelineSchema log = logging.getLogger(__name__) @@ -166,7 +167,8 @@ def __init__(self, wf_path: Path) -> None: self.wf_path = Path(wf_path) self.pipeline_name: Optional[str] = None self.pipeline_prefix: Optional[str] = None - self.schema_obj: Optional[Dict] = None + self.schema_obj: Optional[PipelineSchema] = None + self.repo: Optional[git.Repo] = None try: repo = git.Repo(self.wf_path) @@ -1254,7 +1256,7 @@ def get_first_available_path(directory: Union[Path, str], paths: List[str]) -> U return None -def sort_dictionary(d): +def sort_dictionary(d: Dict) -> Dict: """Sorts a nested dictionary recursively""" result = {} for k, v in sorted(d.items()): From 49592cbb0c28655782aa7cd81c45f0d6b54ac307 Mon Sep 17 00:00:00 2001 From: mashehu Date: Fri, 16 Aug 2024 12:15:51 +0200 Subject: [PATCH 62/65] switch to add_workflow method to add main entity and add components as datasets with descriptions --- nf_core/pipelines/rocrate.py | 192 +++++++++++++++++++---------------- nf_core/utils.py | 10 +- 2 files changed, 113 insertions(+), 89 deletions(-) diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index 252f4b6a0d..f488f54f50 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -5,8 +5,9 @@ import os import sys import tempfile +from datetime import datetime from pathlib import Path -from typing import Optional, Set, Union +from typing import Dict, List, Optional, Set, Union from urllib.parse import quote import requests @@ -132,20 +133,6 @@ def make_workflow_ro_crate(self) -> None: # Create the RO Crate object self.crate = rocrate.rocrate.ROCrate() - # Set language type - programming_language = rocrate.model.entity.Entity( - self.crate, - "#nextflow", - properties={ - "@type": ["ComputerLanguage", "SoftwareApplication"], - "name": "Nextflow", - "url": "https://www.nextflow.io/", - "identifier": "https://www.nextflow.io/", - "version": self.pipeline_obj.nf_config.get("manifest.nextflowVersion", ""), - }, - ) - self.crate.add(programming_language) - # Conform to RO-Crate 1.1 and workflowhub-ro-crate self.crate.update_jsonld( { @@ -217,67 +204,66 @@ def set_main_entity(self, main_entity_filename: str): dest_path=main_entity_filename, main=True, lang="nextflow", # adds the #nextflow entity automatically and connects it to programmingLanguage - lang_version="X.Y.Z", # sets version on #nextflow + lang_version=self.pipeline_obj.nf_config.get("manifest.nextflowVersion", ""), + ) + + self.crate.mainEntity.append_to( + "dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", compact=True + ) + # add dateCreated and dateModified, based on the current data + self.crate.mainEntity.append_to("dateCreated", self.crate.root_dataset.get("dateCreated", ""), compact=True) + self.crate.mainEntity.append_to( + "dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True ) - # wf_file = self.crate.add_file( - # main_entity_filename, - # properties={"@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"]}, - # ) - # wf_file = cast(rocrate.model.entity.Entity, wf_file) # ro-crate is untyped so need to cast type manually - - # wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}, compact=True) - # wf_file.append_to( - # "dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", compact=True - # ) - # # add dateCreated and dateModified, based on the current data - # wf_file.append_to("dateCreated", self.crate.root_dataset.get("dateCreated", ""), compact=True) - # wf_file.append_to("dateModified", str(datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")), compact=True) - # wf_file.append_to("sdPublisher", {"@id": "https://nf-co.re/"}, compact=True) - # if self.version.endswith("dev"): - # url = "dev" - # else: - # url = self.version - # wf_file.append_to("url", f"https://nf-co.re/{self.crate.name.replace('nf-core/','')}/{url}/", compact=True) - # wf_file.append_to("version", self.version, compact=True) - # if self.pipeline_obj.schema_obj is not None: - # log.debug("input value") - - # schema_input = self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["properties"][ - # "input" - # ] - # input_value: Dict[str, Union[str, List[str], bool]] = { - # "@id": "#input", - # "@type": ["PropertyValueSpecification", "FormalParameter"], - # "default": schema_input.get("default", ""), - # "encodingFormat": schema_input.get("mimetype", ""), - # "valueRequired": "input" - # in self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["required"], - # "dct:conformsTo": "https://bioschemas.org/types/FormalParameter/1.0-RELEASE", - # } - # self.crate.add_jsonld(input_value) - # wf_file.append_to( - # "input", - # {"@id": "#input"}, - # ) - - # # get keywords from nf-core website - # remote_workflows = requests.get("https://nf-co.re/pipelines.json").json()["remote_workflows"] - # # go through all remote workflows and find the one that matches the pipeline name - # topics = ["nf-core", "nextflow"] - # for remote_wf in remote_workflows: - # if remote_wf["name"] == self.pipeline_obj.pipeline_name.replace("nf-core/", ""): - # topics = topics + remote_wf["topics"] - # break - - # log.debug(f"Adding topics: {topics}") - # wf_file.append_to("keywords", topics) - - # self.add_main_authors(wf_file) - - # self.crate.mainEntity = wf_file - - # wf_file.append_to("license", self.crate.license) - # wf_file.append_to("name", self.crate.name) + self.crate.mainEntity.append_to("sdPublisher", {"@id": "https://nf-co.re/"}, compact=True) + if self.version.endswith("dev"): + url = "dev" + else: + url = self.version + self.crate.mainEntity.append_to( + "url", f"https://nf-co.re/{self.crate.name.replace('nf-core/','')}/{url}/", compact=True + ) + self.crate.mainEntity.append_to("version", self.version, compact=True) + if self.pipeline_obj.schema_obj is not None: + log.debug("input value") + + schema_input = self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["properties"][ + "input" + ] + input_value: Dict[str, Union[str, List[str], bool]] = { + "@id": "#input", + "@type": ["PropertyValueSpecification", "FormalParameter"], + "default": schema_input.get("default", ""), + "encodingFormat": schema_input.get("mimetype", ""), + "valueRequired": "input" + in self.pipeline_obj.schema_obj.schema["definitions"]["input_output_options"]["required"], + "dct:conformsTo": "https://bioschemas.org/types/FormalParameter/1.0-RELEASE", + } + self.crate.add_jsonld(input_value) + self.crate.mainEntity.append_to( + "input", + {"@id": "#input"}, + ) + + # get keywords from nf-core website + remote_workflows = requests.get("https://nf-co.re/pipelines.json").json()["remote_workflows"] + # go through all remote workflows and find the one that matches the pipeline name + topics = ["nf-core", "nextflow"] + for remote_wf in remote_workflows: + assert self.pipeline_obj.pipeline_name is not None # mypy + if remote_wf["name"] == self.pipeline_obj.pipeline_name.replace("nf-core/", ""): + topics = topics + remote_wf["topics"] + break + + log.debug(f"Adding topics: {topics}") + self.crate.mainEntity.append_to("keywords", topics) + + # self.add_main_authors(self.crate.mainEntity) + + self.crate.mainEntity = self.crate.mainEntity + + self.crate.mainEntity.append_to("license", self.crate.license) + self.crate.mainEntity.append_to("name", self.crate.name) def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: """ @@ -357,12 +343,48 @@ def add_workflow_files(self): # exclude github action files wf_filenames = [fn for fn in wf_filenames if not fn.startswith(".github/") and not fn == "main.nf"] log.debug(f"Adding {len(wf_filenames)} workflow files") + # find all main.nf files inside modules/nf-core and subworkflows/nf-core + component_files = [ + fn + for fn in wf_filenames + if ((fn.startswith("modules/nf-core") or fn.startswith("subworkflows/nf-core")) and fn.endswith("main.nf")) + ] + + wf_dirs = [str(Path(fn).parent) for fn in component_files] + for wf_dir in wf_dirs: + if Path(wf_dir).exists(): + log.debug(f"Adding workflow directory: {wf_dir}") + component_type = wf_dir.split("/")[0] + component_name = wf_dir.replace(component_type + "/nf-core/", "").replace("/", "_") + self.crate.add_directory( + wf_dir, + dest_path=wf_dir, + properties={ + "description": f"nf-core {component_type} [{component_name}](https://nf-co.re/{component_type}/{component_name}) installed from the [nf-core/modules repository](https://github.com/nf-core/modules/)." + }, + ) + wf_locals = [ + str(Path(fn).parent) + for fn in wf_filenames + if fn.startswith("modules/local") or fn.startswith("subworkflows/local") and fn.endswith("main.nf") + ] + + for wf_dir in wf_locals: + log.debug(f"Adding workflow directory: {wf_dir}") + component_type = wf_dir.split("/")[0].rstrip("s") + component_name = wf_dir.replace(component_type + "/local/", "").replace("/", "_") + + self.crate.add_directory(wf_dir, dest_path=wf_dir, properties={"description": f"local {component_type}"}) + # go through all files that are not part of directories inside wf_dirs + wf_filenames = [ + fn for fn in wf_filenames if not any(fn.startswith(str(wf_dir)) for wf_dir in wf_dirs + wf_locals) + ] for fn in wf_filenames: # add nextflow language to .nf and .config files - # if fn.endswith(".nf") or fn.endswith(".config") or fn.endswith(".nf.test") and not fn.endswith("main.nf"): - # log.debug(f"Adding workflow file: {fn}") - # self.crate.add_file(fn, properties={"programmingLanguage": {"@id": "#nextflow"}}) - # continue + if fn.endswith(".nf") or fn.endswith(".config") or fn.endswith(".nf.test"): + log.debug(f"Adding workflow file: {fn}") + self.crate.add_file(fn, dest_path=fn, properties={"programmingLanguage": {"@id": "#nextflow"}}) + continue if fn.endswith(".png"): log.debug(f"Adding workflow image file: {fn}") self.crate.add_jsonld({"@id": fn, "@type": ["File", "ImageObject"]}) @@ -378,13 +400,13 @@ def add_workflow_files(self): self.crate.mainEntity.append_to("image", {"@id": Path(fn).name}) continue if fn.endswith(".md"): - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn, properties={"encodingFormat": "text/markdown"}) + log.debug(f"Adding file: {fn}") + self.crate.add_file(fn, dest_path=fn, properties={"encodingFormat": "text/markdown"}) + continue + else: + log.debug(f"Adding file: {fn}") + self.crate.add_file(fn, dest_path=fn) continue - # else: - # log.debug(f"Adding workflow file: {fn}") - # self.crate.add_file(fn) - # continue def set_crate_paths(self, path: Path) -> None: """Given a pipeline name, directory, or path, set wf_crate_filename""" diff --git a/nf_core/utils.py b/nf_core/utils.py index 70183648f8..c9e9afb262 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -20,7 +20,7 @@ import time from contextlib import contextmanager from pathlib import Path -from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, Generator, List, Optional, Tuple, Union import git import prompt_toolkit.styles @@ -36,7 +36,9 @@ from rich.spinner import Spinner import nf_core -from nf_core.pipelines.schema import PipelineSchema + +if TYPE_CHECKING: + from nf_core.pipelines.schema import PipelineSchema log = logging.getLogger(__name__) @@ -171,8 +173,8 @@ def __init__(self, wf_path: Path) -> None: self.repo: Optional[git.Repo] = None try: - repo = git.Repo(self.wf_path) - self.git_sha = repo.head.object.hexsha + self.repo = git.Repo(self.wf_path) + self.git_sha = self.repo.head.object.hexsha except Exception as e: log.debug(f"Could not find git hash for pipeline: {self.wf_path}. {e}") From c1d08d7ec5057056d28182b7e349b500a447eccc Mon Sep 17 00:00:00 2001 From: mashehu Date: Tue, 27 Aug 2024 15:55:40 +0200 Subject: [PATCH 63/65] ro_crate -> rocrate --- nf_core/__main__.py | 11 ++++++----- nf_core/commands_pipelines.py | 6 +++--- nf_core/pipelines/rocrate.py | 21 ++++++++++++--------- tests/test_rocrate.py | 2 +- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 13366a4b8b..83493a99ef 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -35,6 +35,7 @@ pipelines_launch, pipelines_lint, pipelines_list, + pipelines_rocrate, pipelines_schema_build, pipelines_schema_docs, pipelines_schema_lint, @@ -85,7 +86,7 @@ }, { "name": "For developers", - "commands": ["create", "lint", "bump-version", "sync", "schema", "create-logo"], + "commands": ["create", "lint", "bump-version", "sync", "schema", "rocrate", "create-logo"], }, ], "nf-core modules": [ @@ -569,8 +570,8 @@ def command_pipelines_list(ctx, keywords, sort, json, show_archived): pipelines_list(ctx, keywords, sort, json, show_archived) -# nf-core pipelines ro-crate -@pipelines.command("ro-crate") +# nf-core pipelines rocrate +@pipelines.command("rocrate") @click.argument( "pipeline_dir", type=click.Path(exists=True), @@ -594,7 +595,7 @@ def command_pipelines_list(ctx, keywords, sort, json, show_archived): default="", ) @click.pass_context -def ro_crate( +def rocrate( ctx, pipeline_dir: str, json_path: str, @@ -604,7 +605,7 @@ def ro_crate( """ Make an Research Object Crate """ - pipelines_ro_crate(ctx, pipeline_dir, json_path, zip_path, pipeline_version) + pipelines_rocrate(ctx, pipeline_dir, json_path, zip_path, pipeline_version) # nf-core pipelines sync diff --git a/nf_core/commands_pipelines.py b/nf_core/commands_pipelines.py index a0fbe0838a..35f4a870d3 100644 --- a/nf_core/commands_pipelines.py +++ b/nf_core/commands_pipelines.py @@ -278,8 +278,8 @@ def pipelines_list(ctx, keywords, sort, json, show_archived): stdout.print(list_workflows(keywords, sort, json, show_archived)) -# nf-core pipelines ro-crate -def pipelines_ro_crate( +# nf-core pipelines rocrate +def pipelines_rocrate( ctx, pipeline_dir: Union[str, Path], json_path: Optional[Union[str, Path]], @@ -299,7 +299,7 @@ def pipelines_ro_crate( zip_path = Path(zip_path) try: rocrate_obj = ROCrate(pipeline_dir, pipeline_version) - rocrate_obj.create_ro_crate(pipeline_dir, metadata_path=json_path, zip_path=zip_path) + rocrate_obj.create_rocrate(pipeline_dir, metadata_path=json_path, zip_path=zip_path) except (UserWarning, LookupError, FileNotFoundError) as e: log.error(e) sys.exit(1) diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index f488f54f50..ee6564e435 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -3,15 +3,14 @@ import logging import os +import re import sys import tempfile from datetime import datetime from pathlib import Path from typing import Dict, List, Optional, Set, Union -from urllib.parse import quote import requests -import rocrate.model.entity import rocrate.rocrate from git import GitCommandError, InvalidGitRepositoryError from rich.progress import BarColumn, Progress @@ -52,7 +51,7 @@ def __init__(self, pipeline_dir: Path, version="") -> None: setup_requests_cachedir() - def create_ro_crate( + def create_rocrate( self, outdir: Path, metadata_path: Union[None, Path] = None, zip_path: Union[None, Path] = None ) -> None: """ @@ -96,7 +95,7 @@ def create_ro_crate( log.error(f"Could not checkout version {self.version}") sys.exit(1) self.version = self.pipeline_obj.nf_config.get("manifest.version", "") - self.make_workflow_ro_crate() + self.make_workflow_rocrate() # Save just the JSON metadata file if metadata_path is not None: @@ -123,7 +122,7 @@ def create_ro_crate( # Change back to the original directory os.chdir(current_path) - def make_workflow_ro_crate(self) -> None: + def make_workflow_rocrate(self) -> None: """ Create an RO Crate for a pipeline """ @@ -258,7 +257,7 @@ def set_main_entity(self, main_entity_filename: str): log.debug(f"Adding topics: {topics}") self.crate.mainEntity.append_to("keywords", topics) - # self.add_main_authors(self.crate.mainEntity) + self.add_main_authors(self.crate.mainEntity) self.crate.mainEntity = self.crate.mainEntity @@ -323,9 +322,14 @@ def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: for author in named_contributors: log.debug(f"Adding author: {author}") + assert self.pipeline_obj.repo is not None # mypy + # get email from git log + email = self.pipeline_obj.repo.git.log(f"--author={author}", "--pretty=format:%ae", "-1") orcid = get_orcid(author) author_entitity = self.crate.add( - Person(self.crate, orcid if orcid is not None else "#" + quote(author), properties={"name": author}) + Person( + self.crate, orcid if orcid is not None else "#" + email, properties={"name": author, "email": email} + ) ) wf_file.append_to("creator", author_entitity) if author in authors: @@ -335,7 +339,6 @@ def add_workflow_files(self): """ Add workflow files to the RO Crate """ - import re import nf_core.utils @@ -360,7 +363,7 @@ def add_workflow_files(self): wf_dir, dest_path=wf_dir, properties={ - "description": f"nf-core {component_type} [{component_name}](https://nf-co.re/{component_type}/{component_name}) installed from the [nf-core/modules repository](https://github.com/nf-core/modules/)." + "description": f"nf-core {re.sub('s$', '', component_type)} [{component_name}](https://nf-co.re/{component_type}/{component_name}) installed from the [nf-core/modules repository](https://github.com/nf-core/modules/)." }, ) wf_locals = [ diff --git a/tests/test_rocrate.py b/tests/test_rocrate.py index eac61fac48..6defd5d5e8 100644 --- a/tests/test_rocrate.py +++ b/tests/test_rocrate.py @@ -54,7 +54,7 @@ def test_rocrate_creation(self): # Run the command self.rocrate_obj = nf_core.pipelines.rocrate.ROCrate(self.test_pipeline_dir) - self.rocrate_obj.create_ro_crate(self.test_pipeline_dir, metadata_path=Path(self.test_pipeline_dir)) + self.rocrate_obj.create_rocrate(self.test_pipeline_dir, metadata_path=Path(self.test_pipeline_dir)) # Check that the crate was created self.assertTrue(Path(self.test_pipeline_dir, "ro-crate-metadata.json").exists()) From 5e4b4f399fa739daf8d237f0533295e89db759a6 Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 17 Oct 2024 17:21:27 +0200 Subject: [PATCH 64/65] use repo2crate to generate the main structure of the crate --- nf_core/__main__.py | 1 + nf_core/commands_pipelines.py | 2 +- nf_core/pipelines/rocrate.py | 237 +++++++++++++--------------------- 3 files changed, 92 insertions(+), 148 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 83493a99ef..69f8f04d0a 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -4,6 +4,7 @@ import logging import os import sys +from pathlib import Path import rich import rich.console diff --git a/nf_core/commands_pipelines.py b/nf_core/commands_pipelines.py index 35f4a870d3..9699dc53a3 100644 --- a/nf_core/commands_pipelines.py +++ b/nf_core/commands_pipelines.py @@ -299,7 +299,7 @@ def pipelines_rocrate( zip_path = Path(zip_path) try: rocrate_obj = ROCrate(pipeline_dir, pipeline_version) - rocrate_obj.create_rocrate(pipeline_dir, metadata_path=json_path, zip_path=zip_path) + rocrate_obj.create_rocrate(pipeline_dir, json_path=json_path, zip_path=zip_path) except (UserWarning, LookupError, FileNotFoundError) as e: log.error(e) sys.exit(1) diff --git a/nf_core/pipelines/rocrate.py b/nf_core/pipelines/rocrate.py index ee6564e435..de00189a2c 100644 --- a/nf_core/pipelines/rocrate.py +++ b/nf_core/pipelines/rocrate.py @@ -5,7 +5,6 @@ import os import re import sys -import tempfile from datetime import datetime from pathlib import Path from typing import Dict, List, Optional, Set, Union @@ -13,8 +12,10 @@ import requests import rocrate.rocrate from git import GitCommandError, InvalidGitRepositoryError +from repo2rocrate.nextflow import NextflowCrateBuilder from rich.progress import BarColumn, Progress from rocrate.model.person import Person +from rocrate.rocrate import ROCrate as BaseROCrate from nf_core.pipelines.schema import PipelineSchema from nf_core.utils import Pipeline @@ -22,6 +23,42 @@ log = logging.getLogger(__name__) +class CustomNextflowCrateBuilder(NextflowCrateBuilder): + DATA_ENTITIES = NextflowCrateBuilder.DATA_ENTITIES + [ + ("docs/usage.md", "File", "Usage documentation"), + ("docs/output.md", "File", "Output documentation"), + ("suborkflows/local", "Dataset", "Pipeline-specific suborkflows"), + ("suborkflows/nf-core", "Dataset", "nf-core suborkflows"), + (".nf-core.yml", "File", "nf-core configuration file, configuring template features and linting rules"), + (".pre-commit-config.yaml", "File", "Configuration file for pre-commit hooks"), + (".prettierignore", "File", "Ignore file for prettier"), + (".prettierrc", "File", "Configuration file for prettier"), + ] + + +def custom_make_crate( + root: Path, + workflow: Optional[Path] = None, + repo_url: Optional[str] = None, + wf_name: Optional[str] = None, + wf_version: Optional[str] = None, + lang_version: Optional[str] = None, + ci_workflow: Optional[str] = "ci.yml", + diagram: Optional[Path] = None, +) -> BaseROCrate: + builder = CustomNextflowCrateBuilder(root, repo_url=repo_url) + + return builder.build( + workflow, + wf_name=wf_name, + wf_version=wf_version, + lang_version=lang_version, + license=None, + ci_workflow=ci_workflow, + diagram=diagram, + ) + + class ROCrate: """ Class to generate an RO Crate for a pipeline @@ -52,19 +89,17 @@ def __init__(self, pipeline_dir: Path, version="") -> None: setup_requests_cachedir() def create_rocrate( - self, outdir: Path, metadata_path: Union[None, Path] = None, zip_path: Union[None, Path] = None + self, outdir: Path, json_path: Union[None, Path] = None, zip_path: Union[None, Path] = None ) -> None: """ Create an RO Crate for a pipeline Args: outdir (Path): Path to the output directory - metadata_path (Path): Path to the metadata file + json_path (Path): Path to the metadata file zip_path (Path): Path to the zip file """ - import os - # Set input paths try: self.set_crate_paths(outdir) @@ -73,8 +108,6 @@ def create_rocrate( sys.exit(1) # Change to the pipeline directory, because the RO Crate doesn't handle relative paths well - current_path = Path.cwd() - os.chdir(self.pipeline_dir) # Check that the checkout pipeline version is the same as the requested version if self.version != "": @@ -98,29 +131,19 @@ def create_rocrate( self.make_workflow_rocrate() # Save just the JSON metadata file - if metadata_path is not None: - log.info(f"Saving metadata file to '{metadata_path}'") - # Save the crate to a temporary directory - tmpdir = Path(tempfile.TemporaryDirectory().name) - self.crate.write(tmpdir) - # Now save just the JSON file - crate_json_fn = Path(tmpdir, "ro-crate-metadata.json") - if metadata_path.name == "ro-crate-metadata.json": - crate_json_fn.rename(metadata_path) - else: - crate_json_fn.rename(metadata_path / "ro-crate-metadata.json") + if json_path is not None: + if json_path.name != "ro-crate-metadata.json": + json_path = json_path / "ro-crate-metadata.json" + + log.info(f"Saving metadata file to '{json_path}'") + self.crate.metadata.write(json_path) # Save the whole crate zip file if zip_path is not None: - if zip_path.name == "ro-crate.crate.zip": - log.info(f"Saving zip file '{zip_path}'") - self.crate.write_zip(zip_path) - else: - log.info(f"Saving zip file '{zip_path}/ro-crate.crate.zip;") - self.crate.write_zip(zip_path / "ro-crate.crate.zip") - - # Change back to the original directory - os.chdir(current_path) + if zip_path.name != "ro-crate.crate.zip": + zip_path = zip_path / "ro-crate.crate.zip" + log.info(f"Saving zip file '{zip_path}") + self.crate.write_zip(zip_path) def make_workflow_rocrate(self) -> None: """ @@ -129,18 +152,25 @@ def make_workflow_rocrate(self) -> None: if self.pipeline_obj is None: raise ValueError("Pipeline object not loaded") + diagram: Optional[Path] = None + # find files (metro|tube)_?(map)?.png in the pipeline directory or docs/ using pathlib + pattern = re.compile(r".*?(metro|tube|subway)_(map).*?\.png", re.IGNORECASE) + for file in self.pipeline_dir.rglob("*.png"): + if pattern.match(file.name): + log.debug(f"Found diagram: {file}") + diagram = file.relative_to(self.pipeline_dir) + break + # Create the RO Crate object - self.crate = rocrate.rocrate.ROCrate() - - # Conform to RO-Crate 1.1 and workflowhub-ro-crate - self.crate.update_jsonld( - { - "@id": "ro-crate-metadata.json", - "conformsTo": [ - {"@id": "https://w3id.org/ro/crate/1.1"}, - {"@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0"}, - ], - } + + self.crate = custom_make_crate( + self.pipeline_dir, + self.pipeline_dir / "main.nf", + self.pipeline_obj.nf_config.get("manifest.homePage", ""), + self.pipeline_obj.nf_config.get("manifest.name", ""), + self.pipeline_obj.nf_config.get("manifest.version", ""), + self.pipeline_obj.nf_config.get("manifest.nextflowVersion", ""), + diagram=diagram, ) # add readme as description @@ -163,48 +193,19 @@ def make_workflow_rocrate(self) -> None: except FileNotFoundError: log.error(f"Could not find LICENSE file in {self.pipeline_dir}") - self.crate.name = self.pipeline_obj.nf_config.get("manifest.name") - - self.crate.root_dataset.append_to("version", self.version, compact=True) - - if "dev" in self.version: - self.crate.CreativeWorkStatus = "InProgress" - else: - self.crate.CreativeWorkStatus = "Stable" - if self.pipeline_obj.repo is None: - log.error(f"Pipeline repository not found in {self.pipeline_dir}") - else: - tags = self.pipeline_obj.repo.tags - if tags: - # get the tag for this version - for tag in tags: - if tag.commit.hexsha == self.pipeline_obj.repo.head.commit.hexsha: - self.crate.root_dataset.append_to( - "dateCreated", - tag.commit.committed_datetime.strftime("%Y-%m-%dT%H:%M:%SZ"), - compact=True, - ) - self.crate.add_jsonld( {"@id": "https://nf-co.re/", "@type": "Organization", "name": "nf-core", "url": "https://nf-co.re/"} ) - # Set main entity file + # Set metadata for main entity file self.set_main_entity("main.nf") - # Add all other files - self.add_workflow_files() def set_main_entity(self, main_entity_filename: str): """ Set the main.nf as the main entity of the crate and add necessary metadata """ - self.crate.add_workflow( # sets @type and conformsTo according to Workflow RO-Crate spec - main_entity_filename, - dest_path=main_entity_filename, - main=True, - lang="nextflow", # adds the #nextflow entity automatically and connects it to programmingLanguage - lang_version=self.pipeline_obj.nf_config.get("manifest.nextflowVersion", ""), - ) + if self.crate.mainEntity is None: + raise ValueError("Main entity not set") self.crate.mainEntity.append_to( "dct:conformsTo", "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", compact=True @@ -231,7 +232,7 @@ def set_main_entity(self, main_entity_filename: str): ] input_value: Dict[str, Union[str, List[str], bool]] = { "@id": "#input", - "@type": ["PropertyValueSpecification", "FormalParameter"], + "@type": ["FormalParameter"], "default": schema_input.get("default", ""), "encodingFormat": schema_input.get("mimetype", ""), "valueRequired": "input" @@ -264,6 +265,24 @@ def set_main_entity(self, main_entity_filename: str): self.crate.mainEntity.append_to("license", self.crate.license) self.crate.mainEntity.append_to("name", self.crate.name) + if "dev" in self.version: + self.crate.creativeWorkStatus = "InProgress" + else: + self.crate.creativeWorkStatus = "Stable" + if self.pipeline_obj.repo is None: + log.error(f"Pipeline repository not found in {self.pipeline_dir}") + else: + tags = self.pipeline_obj.repo.tags + if tags: + # get the tag for this version + for tag in tags: + if tag.commit.hexsha == self.pipeline_obj.repo.head.commit.hexsha: + self.crate.mainEntity.append_to( + "dateCreated", + tag.commit.committed_datetime.strftime("%Y-%m-%dT%H:%M:%SZ"), + compact=True, + ) + def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: """ Add workflow authors to the crate @@ -335,82 +354,6 @@ def add_main_authors(self, wf_file: rocrate.model.entity.Entity) -> None: if author in authors: wf_file.append_to("maintainer", author_entitity) - def add_workflow_files(self): - """ - Add workflow files to the RO Crate - """ - - import nf_core.utils - - wf_filenames = nf_core.utils.get_wf_files(Path.cwd()) - # exclude github action files - wf_filenames = [fn for fn in wf_filenames if not fn.startswith(".github/") and not fn == "main.nf"] - log.debug(f"Adding {len(wf_filenames)} workflow files") - # find all main.nf files inside modules/nf-core and subworkflows/nf-core - component_files = [ - fn - for fn in wf_filenames - if ((fn.startswith("modules/nf-core") or fn.startswith("subworkflows/nf-core")) and fn.endswith("main.nf")) - ] - - wf_dirs = [str(Path(fn).parent) for fn in component_files] - for wf_dir in wf_dirs: - if Path(wf_dir).exists(): - log.debug(f"Adding workflow directory: {wf_dir}") - component_type = wf_dir.split("/")[0] - component_name = wf_dir.replace(component_type + "/nf-core/", "").replace("/", "_") - self.crate.add_directory( - wf_dir, - dest_path=wf_dir, - properties={ - "description": f"nf-core {re.sub('s$', '', component_type)} [{component_name}](https://nf-co.re/{component_type}/{component_name}) installed from the [nf-core/modules repository](https://github.com/nf-core/modules/)." - }, - ) - wf_locals = [ - str(Path(fn).parent) - for fn in wf_filenames - if fn.startswith("modules/local") or fn.startswith("subworkflows/local") and fn.endswith("main.nf") - ] - - for wf_dir in wf_locals: - log.debug(f"Adding workflow directory: {wf_dir}") - component_type = wf_dir.split("/")[0].rstrip("s") - component_name = wf_dir.replace(component_type + "/local/", "").replace("/", "_") - - self.crate.add_directory(wf_dir, dest_path=wf_dir, properties={"description": f"local {component_type}"}) - # go through all files that are not part of directories inside wf_dirs - wf_filenames = [ - fn for fn in wf_filenames if not any(fn.startswith(str(wf_dir)) for wf_dir in wf_dirs + wf_locals) - ] - for fn in wf_filenames: - # add nextflow language to .nf and .config files - if fn.endswith(".nf") or fn.endswith(".config") or fn.endswith(".nf.test"): - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn, dest_path=fn, properties={"programmingLanguage": {"@id": "#nextflow"}}) - continue - if fn.endswith(".png"): - log.debug(f"Adding workflow image file: {fn}") - self.crate.add_jsonld({"@id": fn, "@type": ["File", "ImageObject"]}) - if re.search(r"(metro|tube)_?(map)?", fn) and self.crate.mainEntity is not None: - # check if image is set in main entity - if self.crate.mainEntity.get("image"): - log.info( - f"Main entity already has an image: {self.crate.mainEntity.get('image')}, replacing it with: {fn}" - ) - else: - log.info(f"Setting main entity image to: {fn}") - self.crate.update_jsonld({"@id": "#" + fn, "about": {"@id": self.crate.mainEntity.id}}) - self.crate.mainEntity.append_to("image", {"@id": Path(fn).name}) - continue - if fn.endswith(".md"): - log.debug(f"Adding file: {fn}") - self.crate.add_file(fn, dest_path=fn, properties={"encodingFormat": "text/markdown"}) - continue - else: - log.debug(f"Adding file: {fn}") - self.crate.add_file(fn, dest_path=fn) - continue - def set_crate_paths(self, path: Path) -> None: """Given a pipeline name, directory, or path, set wf_crate_filename""" From ad7b895b28dcfe879ecb444865ca04e76a02b31c Mon Sep 17 00:00:00 2001 From: mashehu Date: Wed, 23 Oct 2024 12:32:14 +0200 Subject: [PATCH 65/65] remove outdated file --- nf_core/ro_crate.py | 324 -------------------------------------------- 1 file changed, 324 deletions(-) delete mode 100644 nf_core/ro_crate.py diff --git a/nf_core/ro_crate.py b/nf_core/ro_crate.py deleted file mode 100644 index 0c3b486ac2..0000000000 --- a/nf_core/ro_crate.py +++ /dev/null @@ -1,324 +0,0 @@ -#!/usr/bin/env python -"""Code to deal with pipeline RO (Research Object) Crates""" - -import logging -import tempfile -from pathlib import Path -from typing import Union - -import requests -import rocrate.model.entity -import rocrate.rocrate -from git import GitCommandError, InvalidGitRepositoryError -from rocrate.model.person import Person - -from nf_core.utils import Pipeline - -log = logging.getLogger(__name__) - - -class ROCrate: - """ - Class to generate an RO Crate for a pipeline - - Args: - pipeline_dir (Path): Path to the pipeline directory - version (str): Version of the pipeline to use - - """ - - def __init__(self, pipeline_dir: Path, version=""): - from nf_core.utils import is_pipeline_directory, setup_requests_cachedir - - is_pipeline_directory(pipeline_dir) - self.pipeline_dir = pipeline_dir - self.version = version - self.crate: rocrate.rocrate.ROCrate - self.pipeline_obj = Pipeline(str(self.pipeline_dir)) - self.pipeline_obj._load() - - setup_requests_cachedir() - - def create_ro_crate( - self, outdir: Path, metadata_path: Union[None, Path] = None, zip_path: Union[None, Path] = None - ) -> None: - """ - Create an RO Crate for a pipeline - - Args: - outdir (Path): Path to the output directory - metadata_path (Path): Path to the metadata file - zip_path (Path): Path to the zip file - - """ - import os - - # Set input paths - try: - self.set_crate_paths(outdir) - except OSError as e: - log.error(e) - sys.exit(1) - - # Change to the pipeline directory, because the RO Crate doesn't handle relative paths well - current_path = Path.cwd() - os.chdir(self.pipeline_dir) - - # Check that the checkout pipeline version is the same as the requested version - if self.version: - if self.version != self.pipeline_obj.nf_config.get("manifest.version"): - # using git checkout to get the requested version - log.info(f"Checking out pipeline version {self.version}") - try: - self.pipeline_obj.repo.git.checkout(self.version) - self.pipeline_obj = Pipeline(str(self.pipeline_dir)) - self.pipeline_obj._load() - except InvalidGitRepositoryError: - log.error(f"Could not find a git repository in {self.pipeline_dir}") - sys.exit(1) - except GitCommandError: - log.error(f"Could not checkout version {self.version}") - sys.exit(1) - - self.make_workflow_ro_crate() - - # Save just the JSON metadata file - if metadata_path is not None: - log.info(f"Saving metadata file '{metadata_path}'") - # Save the crate to a temporary directory - tmpdir = Path(tempfile.mkdtemp(), "wf") - self.crate.write(tmpdir) - # Now save just the JSON file - crate_json_fn = Path(tmpdir, "ro-crate-metadata.json") - if metadata_path.name == "ro-crate-metadata.json": - crate_json_fn.rename(metadata_path) - else: - crate_json_fn.rename(metadata_path / "ro-crate-metadata.json") - - # Save the whole crate zip file - if zip_path is not None: - if zip_path.name == "ro-crate.crate.zip": - log.info(f"Saving zip file '{zip_path}'") - self.crate.write_zip(zip_path) - else: - log.info(f"Saving zip file '{zip_path}/ro-crate.crate.zip;") - self.crate.write_zip(zip_path / "ro-crate.crate.zip") - - # Change back to the original directory - os.chdir(current_path) - - def make_workflow_ro_crate(self) -> None: - """ - Create an RO Crate for a pipeline - """ - if self.pipeline_obj is None: - raise ValueError("Pipeline object not loaded") - - # Create the RO Crate object - self.crate = rocrate.rocrate.ROCrate() - - # Set language type - programming_language = rocrate.model.entity.Entity( - self.crate, - "#nextflow", - properties={ - "@type": ["ComputerLanguage", "SoftwareApplication"], - "name": "Nextflow", - "url": "https://www.nextflow.io/", - "identifier": "https://www.nextflow.io/", - "version": self.pipeline_obj.nf_config.get("manifest.nextflowVersion", ""), - }, - ) - self.crate.add(programming_language) - - # Conform to RO-Crate 1.1 and workflowhub-ro-crate - self.crate.update_jsonld( - { - "@id": "ro-crate-metadata.json", - "conformsTo": [ - {"@id": "https://w3id.org/ro/crate/1.1"}, - {"@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0"}, - ], - } - ) - - # Set main entity file - self.set_main_entity("main.nf") - - # add readme as description - readme = Path("README.md") - - try: - self.crate.description = readme.read_text() - except FileNotFoundError: - log.error(f"Could not find README.md in {self.pipeline_dir}") - # get license from LICENSE file - license_file = Path("LICENSE") - try: - license = license_file.read_text() - if license.startswith("MIT"): - self.crate.license = "MIT" - else: - # prompt for license - log.info("Could not determine license from LICENSE file") - self.crate.license = input("Please enter the license for this pipeline: ") - except FileNotFoundError: - log.error(f"Could not find LICENSE file in {self.pipeline_dir}") - - # add doi as identifier - self.crate.name = f'Research Object Crate for {self.pipeline_obj.nf_config.get("manifest.name")}' - - if "dev" in self.pipeline_obj.nf_config.get("manifest.version", ""): - self.crate.CreativeWorkStatus = "InProgress" - else: - self.crate.CreativeWorkStatus = "Stable" - - # Add all other files - self.add_workflow_files() - - def set_main_entity(self, main_entity_filename: str): - """ - Set the main.nf as the main entity of the crate and add necessary metadata - """ - - wf_file = self.crate.add_jsonld( - { - "@id": main_entity_filename, - "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], - }, - ) - self.crate.mainEntity = wf_file - self.add_main_authors(wf_file) - wf_file.append_to("programmingLanguage", {"@id": "#nextflow"}) - # get keywords from nf-core website - remote_workflows = requests.get("https://nf-co.re/pipelines.json").json()["remote_workflows"] - # go through all remote workflows and find the one that matches the pipeline name - topics = ["nf-core", "nextflow"] - for remote_wf in remote_workflows: - if remote_wf["name"] == self.pipeline_obj.pipeline_name.replace("nf-core/", ""): - topics = topics + remote_wf["topics"] - break - - log.debug(f"Adding topics: {topics}") - wf_file.append_to("keywords", topics) - - def add_main_authors(self, wf_file): - """ - Add workflow authors to the crate - """ - # add author entity to crate - - try: - authors = self.pipeline_obj.nf_config["manifest.author"].split(",") - # remove spaces - authors = [a.strip() for a in authors] - except KeyError: - log.error("No author field found in manifest of nextflow.config") - return - # look at git contributors for author names - try: - contributors = set() - - commits_touching_path = list(self.pipeline_obj.repo.iter_commits(paths="main.nf")) - - for commit in commits_touching_path: - contributors.add(commit.author.name) - # exclude bots - contributors = [c for c in contributors if not c.endswith("bot") or c != "Travis CI User"] - # remove usernames (just keep names with spaces) - contributors = [c for c in contributors if " " in c] - - log.debug(f"Found {len(contributors)} git authors") - for git_author in contributors: - if git_author not in authors: - authors.append(git_author) - except AttributeError: - log.debug("Could not find git authors") - - for author in authors: - log.debug(f"Adding author: {author}") - orcid = get_orcid(author) - author_entitity = self.crate.add(Person(self.crate, orcid, properties={"name": author})) - wf_file.append_to("author", author_entitity) - - def add_workflow_files(self): - """ - Add workflow files to the RO Crate - """ - import nf_core.utils - - wf_filenames = nf_core.utils.get_wf_files(Path.cwd()) - # exclude github action files - wf_filenames = [fn for fn in wf_filenames if not fn.startswith(".github/")] - log.debug(f"Adding {len(wf_filenames)} workflow files") - for fn in wf_filenames: - # skip main.nf - if fn == "main.nf": - continue - # add nextflow language to .nf and .config files - if fn.endswith(".nf") or fn.endswith(".config") or fn.endswith(".nf.test"): - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn, properties={"programmingLanguage": {"@id": "#nextflow"}}) - continue - if fn.endswith(".png"): - log.debug(f"Adding workflow image file: {fn}") - self.crate.add_jsonld({"@id": Path(fn).name, "@type": ["File", "ImageObject"]}) - if "metro_map" in fn: - log.info(f"Setting main entity image to: {fn}") - self.crate.mainEntity.append_to("image", {"@id": Path(fn).name}) - continue - if fn.endswith(".md"): - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn, properties={"encodingFormat": "text/markdown"}) - continue - else: - log.debug(f"Adding workflow file: {fn}") - self.crate.add_file(fn) - continue - - def set_crate_paths(self, path: Path) -> None: - """Given a pipeline name, directory, or path, set wf_crate_filename""" - - path = Path(path) - - if path.is_dir(): - self.pipeline_dir = path - # wf_crate_filename = path / "ro-crate-metadata.json" - elif path.is_file(): - self.pipeline_dir = path.parent - # wf_crate_filename = path - - # Check that the schema file exists - if self.pipeline_dir is None: - raise OSError(f"Could not find pipeline '{path}'") - - -def get_orcid(name: str) -> Union[str, None]: - """ - Get the ORCID for a given name - - Args: - name (str): Name of the author - - Returns: - str: ORCID URI or None - """ - base_url = "https://pub.orcid.org/v3.0/search/" - headers = { - "Accept": "application/json", - } - params = {"q": f'family-name:"{name.split()[-1]}" AND given-names:"{name.split()[0]}"'} - response = requests.get(base_url, params=params, headers=headers) - - if response.status_code == 200: - json_response = response.json() - if json_response.get("num-found") == 1: - orcid_uri = json_response.get("result")[0].get("orcid-identifier", {}).get("uri") - log.info(f"Using found ORCID for {name}. Please double-check: {orcid_uri}") - return orcid_uri - else: - log.debug(f"No exact ORCID found for {name}. See {response.url}") - return None - else: - log.info(f"API request to ORCID unsuccessful. Status code: {response.status_code}") - return None