Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve testing coverage for meta_yml #2539

Merged
merged 7 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 54 additions & 6 deletions nf_core/components/nfcore_component.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
"""
The NFCoreComponent class holds information and utility functions for a single module or subworkflow
"""
import logging
import re
from pathlib import Path

log = logging.getLogger(__name__)


class NFCoreComponent:
"""
Expand Down Expand Up @@ -44,16 +48,16 @@ def __init__(

if remote_component:
# Initialize the important files
self.main_nf = self.component_dir / "main.nf"
self.meta_yml = self.component_dir / "meta.yml"
self.main_nf = Path(self.component_dir, "main.nf")
self.meta_yml = Path(self.component_dir, "meta.yml")
self.process_name = ""
self.environment_yml = self.component_dir / "environment.yml"
self.environment_yml = Path(self.component_dir, "environment.yml")

repo_dir = self.component_dir.parts[: self.component_dir.parts.index(self.component_name.split("/")[0])][-1]
self.org = repo_dir
self.nftest_testdir = self.component_dir / "tests"
self.nftest_main_nf = self.nftest_testdir / "main.nf.test"
self.tags_yml = self.nftest_testdir / "tags.yml"
self.nftest_testdir = Path(self.component_dir, "tests")
self.nftest_main_nf = Path(self.nftest_testdir, "main.nf.test")
self.tags_yml = Path(self.nftest_testdir, "tags.yml")

if self.repo_type == "pipeline":
patch_fn = f"{self.component_name.replace('/', '-')}.diff"
Expand Down Expand Up @@ -90,3 +94,47 @@ def _get_included_components(self, main_nf: str):
if line.strip().startswith("include"):
included_components.append(line.strip().split()[-1].split(self.org)[-1].split("main")[0].strip("/"))
return included_components

def get_inputs_from_main_nf(self):
"""Collect all inputs from the main.nf file."""
inputs = []
with open(self.main_nf, "r") as f:
data = f.read()
# get input values from main.nf after "input:", which can be formatted as tuple val(foo) path(bar) or val foo or val bar or path bar or path foo
# regex matches:
# val(foo)
# path(bar)
# val foo
# val bar
# path bar
# path foo
# don't match anything inside comments or after "output:"
if "input:" not in data:
log.debug(f"Could not find any inputs in {self.main_nf}")
mashehu marked this conversation as resolved.
Show resolved Hide resolved
return inputs
input_data = data.split("input:")[1].split("output:")[0]
regex = r"(val|path)\s*(\(([^)]+)\)|\s*([^)\s]+))"
mashehu marked this conversation as resolved.
Show resolved Hide resolved
matches = re.finditer(regex, input_data, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
if match.group(3):
inputs.append(match.group(3))
elif match.group(4):
inputs.append(match.group(4))
log.info(f"Found {len(inputs)} inputs in {self.main_nf}")
self.inputs = inputs

def get_outputs_from_main_nf(self):
outputs = []
with open(self.main_nf, "r") as f:
data = f.read()
# get output values from main.nf after "output:". the names are always after "emit:"
if "output:" not in data:
log.info(f"Could not find any outputs in {self.main_nf}")
return outputs
output_data = data.split("output:")[1].split("when:")[0]
regex = r"emit:\s*([^)\s]+)"
mashehu marked this conversation as resolved.
Show resolved Hide resolved
matches = re.finditer(regex, output_data, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
outputs.append(match.group(1))
log.info(f"Found {len(outputs)} outputs in {self.main_nf}")
self.outputs = outputs
2 changes: 1 addition & 1 deletion nf_core/modules/bump_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(
self.tools_config: Dict[str, Any] = {}

def bump_versions(
self, module: Union[NFCoreComponent, None] = None, all_modules: bool = False, show_uptodate: bool = False
self, module: Union[str, None] = None, all_modules: bool = False, show_uptodate: bool = False
) -> None:
"""
Bump the container and conda version of single module or all modules
Expand Down
90 changes: 80 additions & 10 deletions nf_core/modules/lint/meta_yml.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,24 @@ def meta_yml(module_lint_object: ComponentLint, module: NFCoreComponent) -> None
and module input is consistent between the
``meta.yml`` and the ``main.nf``.

If the module has inputs or outputs, they are expected to be
formatted as:

..code-block::
tuple val(foo) path(bar)
val foo
path foo

or permutations of the above.

Args:
module_lint_object (ComponentLint): The lint object for the module
module (NFCoreComponent): The module to lint

"""

module.get_inputs_from_main_nf()
module.get_outputs_from_main_nf()
# Check if we have a patch file, get original file in that case
meta_yaml = None
if module.is_patched:
Expand All @@ -45,14 +62,14 @@ def meta_yml(module_lint_object: ComponentLint, module: NFCoreComponent) -> None
return

# Confirm that the meta.yml file is valid according to the JSON schema
valid_meta_yml = True
valid_meta_yml = False
try:
with open(Path(module_lint_object.modules_repo.local_repo_dir, "modules/meta-schema.json"), "r") as fh:
schema = json.load(fh)
validators.validate(instance=meta_yaml, schema=schema)
module.passed.append(("meta_yml_valid", "Module `meta.yml` is valid", module.meta_yml))
valid_meta_yml = True
except exceptions.ValidationError as e:
valid_meta_yml = False
hint = ""
if len(e.path) > 0:
hint = f"\nCheck the entry for `{e.path[0]}`."
Expand All @@ -79,26 +96,79 @@ def meta_yml(module_lint_object: ComponentLint, module: NFCoreComponent) -> None
meta_input = [list(x.keys())[0] for x in meta_yaml["input"]]
for input in module.inputs:
if input in meta_input:
module.passed.append(("meta_input", f"`{input}` specified", module.meta_yml))
module.passed.append(("meta_input_main_only", f"`{input}` specified", module.meta_yml))
else:
module.warned.append(
(
"meta_input_main_only",
f"`{input}` is present as an input in the `main.nf`, but missing in `meta.yml`",
module.meta_yml,
)
)
# check if there are any inputs in meta.yml that are not in main.nf
for input in meta_input:
if input in module.inputs:
module.passed.append(
(
"meta_input_meta_only",
f"`{input}` is present as an input in `meta.yml` and `main.nf`",
module.meta_yml,
)
)
else:
module.failed.append(("meta_input", f"`{input}` missing in `meta.yml`", module.meta_yml))
module.warned.append(
(
"meta_input_meta_only",
f"`{input}` is present as an input in `meta.yml` but not in `main.nf`",
module.meta_yml,
)
)

if "output" in meta_yaml:
if "output" in meta_yaml and meta_yaml["output"] is not None:
meta_output = [list(x.keys())[0] for x in meta_yaml["output"]]
for output in module.outputs:
if output in meta_output:
module.passed.append(("meta_output", f"`{output}` specified", module.meta_yml))
module.passed.append(("meta_output_main_only", f"`{output}` specified", module.meta_yml))
else:
module.failed.append(("meta_output", f"`{output}` missing in `meta.yml`", module.meta_yml))

module.warned.append(
(
"meta_output_main_only",
f"`{output}` is present as an output in the `main.nf`, but missing in `meta.yml`",
module.meta_yml,
)
)
# check if there are any outputs in meta.yml that are not in main.nf
for output in meta_output:
if output in module.outputs:
module.passed.append(
(
"meta_output_meta_only",
f"`{output}` is present as an output in `meta.yml` and `main.nf`",
module.meta_yml,
)
)
else:
module.warned.append(
(
"meta_output_meta_only",
f"`{output}` is present as an output in `meta.yml` but not in `main.nf`",
module.meta_yml,
)
)
# confirm that the name matches the process name in main.nf
if meta_yaml["name"].upper() == module.process_name:
module.passed.append(("meta_name", "Correct name specified in `meta.yml`", module.meta_yml))
module.passed.append(
(
"meta_name",
"Correct name specified in `meta.yml`.",
module.meta_yml,
)
)
else:
module.failed.append(
(
"meta_name",
f"Conflicting process name between meta.yml (`{meta_yaml['name']}`) and main.nf (`{module.process_name}`)",
f"Conflicting `process` name between meta.yml (`{meta_yaml['name']}`) and main.nf (`{module.process_name}`)",
module.meta_yml,
)
)
10 changes: 5 additions & 5 deletions nf_core/modules/modules_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def repo_full_name_from_remote(remote_url: str) -> str:
return path


def get_installed_modules(dir: str, repo_type="modules") -> Tuple[List[str], List[str]]:
def get_installed_modules(dir: str, repo_type="modules") -> Tuple[List[str], List[NFCoreComponent]]:
"""
Make a list of all modules installed in this repository

Expand All @@ -52,7 +52,7 @@ def get_installed_modules(dir: str, repo_type="modules") -> Tuple[List[str], Lis
"""
# initialize lists
local_modules: List[str] = []
nfcore_modules: List[str] = []
nfcore_modules_names: List[str] = []
local_modules_dir: Optional[str] = None
nfcore_modules_dir = os.path.join(dir, "modules", "nf-core")

Expand All @@ -76,9 +76,9 @@ def get_installed_modules(dir: str, repo_type="modules") -> Tuple[List[str], Lis
# Not a module, but contains sub-modules
if not "main.nf" in m_content:
for tool in m_content:
nfcore_modules.append(os.path.join(m, tool))
nfcore_modules_names.append(os.path.join(m, tool))
else:
nfcore_modules.append(m)
nfcore_modules_names.append(m)

# Make full (relative) file paths and create NFCoreComponent objects
if local_modules_dir:
Expand All @@ -93,7 +93,7 @@ def get_installed_modules(dir: str, repo_type="modules") -> Tuple[List[str], Lis
base_dir=Path(dir),
component_type="modules",
)
for m in nfcore_modules
for m in nfcore_modules_names
]

return local_modules, nfcore_modules
91 changes: 91 additions & 0 deletions tests/modules/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,97 @@ def test_modules_environment_yml_file_name_mismatch(self):
assert module_lint.failed[0].lint_test == "environment_yml_name"


def test_modules_meta_yml_incorrect_licence_field(self):
"""Test linting a module with an incorrect Licence field in meta.yml"""
with open(Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "meta.yml")) as fh:
meta_yml = yaml.safe_load(fh)
meta_yml["tools"][0]["bpipe"]["licence"] = "[MIT]"
with open(Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "meta.yml"), "w") as fh:
fh.write(yaml.dump(meta_yml))
module_lint = nf_core.modules.ModuleLint(dir=self.nfcore_modules)
module_lint.lint(print_results=False, module="bpipe/test")

# reset changes
meta_yml["tools"][0]["bpipe"]["licence"] = ["MIT"]
with open(Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "meta.yml"), "w") as fh:
fh.write(yaml.dump(meta_yml))

assert len(module_lint.failed) == 1, f"Linting failed with {[x.__dict__ for x in module_lint.failed]}"
assert len(module_lint.passed) >= 0
assert len(module_lint.warned) >= 0
assert module_lint.failed[0].lint_test == "meta_yml_valid"


def test_modules_meta_yml_input_mismatch(self):
"""Test linting a module with an extra entry in input fields in meta.yml compared to module.input"""
with open(Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "main.nf")) as fh:
main_nf = fh.read()
main_nf_new = main_nf.replace("path bam", "path bai")
with open(Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "main.nf"), "w") as fh:
fh.write(main_nf_new)
module_lint = nf_core.modules.ModuleLint(dir=self.nfcore_modules)
module_lint.lint(print_results=False, module="bpipe/test")
with open(Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "main.nf"), "w") as fh:
fh.write(main_nf)
assert len(module_lint.failed) == 0, f"Linting failed with {[x.__dict__ for x in module_lint.failed]}"
assert len(module_lint.passed) >= 0
assert len(module_lint.warned) == 2
lint_tests = [x.lint_test for x in module_lint.warned]
# check that it is there twice:
assert lint_tests.count("meta_input_meta_only") == 1
assert lint_tests.count("meta_input_main_only") == 1


def test_modules_meta_yml_output_mismatch(self):
"""Test linting a module with an extra entry in output fields in meta.yml compared to module.output"""
with open(Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "main.nf")) as fh:
main_nf = fh.read()
main_nf_new = main_nf.replace("emit: bam", "emit: bai")
with open(Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "main.nf"), "w") as fh:
fh.write(main_nf_new)
module_lint = nf_core.modules.ModuleLint(dir=self.nfcore_modules)
module_lint.lint(print_results=False, module="bpipe/test")
with open(Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "main.nf"), "w") as fh:
fh.write(main_nf)
assert len(module_lint.failed) == 0, f"Linting failed with {[x.__dict__ for x in module_lint.failed]}"
assert len(module_lint.passed) >= 0
assert len(module_lint.warned) == 2
lint_tests = [x.lint_test for x in module_lint.warned]
# check that it is there twice:
assert lint_tests.count("meta_output_meta_only") == 1
assert lint_tests.count("meta_output_main_only") == 1


def test_modules_meta_yml_incorrect_name(self):
"""Test linting a module with an incorrect name in meta.yml"""
with open(Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "meta.yml")) as fh:
meta_yml = yaml.safe_load(fh)
meta_yml["name"] = "bpipe/test"
# need to make the same change to the environment.yml file
with open(Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "environment.yml")) as fh:
environment_yml = yaml.safe_load(fh)
environment_yml["name"] = "bpipe/test"
with open(Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "meta.yml"), "w") as fh:
fh.write(yaml.dump(meta_yml))
with open(Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "environment.yml"), "w") as fh:
fh.write(yaml.dump(environment_yml))
module_lint = nf_core.modules.ModuleLint(dir=self.nfcore_modules)
module_lint.lint(print_results=False, module="bpipe/test")

# reset changes
meta_yml["name"] = "bpipe_test"
with open(Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "meta.yml"), "w") as fh:
fh.write(yaml.dump(meta_yml))
environment_yml["name"] = "bpipe_test"
with open(Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "environment.yml"), "w") as fh:
fh.write(yaml.dump(environment_yml))

assert len(module_lint.failed) == 1, f"Linting failed with {[x.__dict__ for x in module_lint.failed]}"
assert len(module_lint.passed) >= 0
assert len(module_lint.warned) >= 0
assert module_lint.failed[0].lint_test == "meta_name"


def test_modules_missing_test_dir(self):
"""Test linting a module with a missing test directory"""
Path(self.nfcore_modules, "modules", "nf-core", "bpipe", "test", "tests").rename(
Expand Down
Loading
Loading