Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update fastqc to produce multi-version versions.yml #665

Merged
merged 19 commits into from
Sep 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Closes #XXX <!-- If this PR fixes an issue, please link it here! -->
- [ ] If you've added a new tool - have you followed the module conventions in the [contribution docs](https://github.com/nf-core/modules/tree/master/.github/CONTRIBUTING.md)
- [ ] If necessary, include test data in your PR.
- [ ] Remove all TODO statements.
- [ ] Emit the `<SOFTWARE>.version.txt` file.
- [ ] Emit the `versions.yml` file.
- [ ] Follow the naming conventions.
- [ ] Follow the parameters requirements.
- [ ] Follow the input/output options guidelines.
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pytest-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ jobs:
# Test the module
- name: Run pytest-workflow
# only use one thread for pytest-workflow to avoid race condition on conda cache.
run: TMPDIR=~ PROFILE=${{ matrix.profile }} pytest --tag ${{ matrix.tags }} --symlink --kwdof
run: NF_CORE_MODULES_TEST=1 TMPDIR=~ PROFILE=${{ matrix.profile }} pytest --tag ${{ matrix.tags }} --symlink --kwdof

- name: Upload logs on failure
if: failure()
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,7 @@ output/
*.code-workspace
.screenrc
.*.sw?
__pycache__
*.pyo
*.pyc
tests/data/
54 changes: 32 additions & 22 deletions modules/fastqc/functions.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}

//
// Extract name of module from process name using $task.process
//
def getProcessName(task_process) {
return task_process.tokenize(':')[-1]
}

//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
Expand Down Expand Up @@ -37,32 +44,35 @@ def getPathFromList(path_list) {
// Function to save/publish module results
//
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]

// Do not publish versions.yml unless running from pytest workflow
if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
return null
}
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
17 changes: 12 additions & 5 deletions modules/fastqc/main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'
include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'

params.options = [:]
options = initOptions(params.options)
Expand All @@ -24,24 +24,31 @@ process FASTQC {
output:
tuple val(meta), path("*.html"), emit: html
tuple val(meta), path("*.zip") , emit: zip
path "*.version.txt" , emit: version
path "versions.yml" , emit: version

script:
// Add soft-links to original FastQs for consistent naming in pipeline
def software = getSoftwareName(task.process)
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
if (meta.single_end) {
"""
[ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz
fastqc $options.args --threads $task.cpus ${prefix}.fastq.gz
fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt

cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
END_VERSIONS
"""
} else {
"""
[ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
[ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
fastqc $options.args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz
fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt

cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
END_VERSIONS
"""
}
}
2 changes: 1 addition & 1 deletion modules/fastqc/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ output:
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
pattern: "versions.yml"
authors:
- "@drpatelh"
- "@grst"
Expand Down
54 changes: 32 additions & 22 deletions modules/multiqc/functions.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}

//
// Extract name of module from process name using $task.process
//
def getProcessName(task_process) {
return task_process.tokenize(':')[-1]
}

//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
Expand Down Expand Up @@ -37,32 +44,35 @@ def getPathFromList(path_list) {
// Function to save/publish module results
//
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]

// Do not publish versions.yml unless running from pytest workflow
if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
return null
}
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
10 changes: 7 additions & 3 deletions modules/multiqc/main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'
include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'

params.options = [:]
options = initOptions(params.options)
Expand All @@ -24,12 +24,16 @@ process MULTIQC {
path "*multiqc_report.html", emit: report
path "*_data" , emit: data
path "*_plots" , optional:true, emit: plots
path "*.version.txt" , emit: version
path "versions.yml" , emit: version

script:
def software = getSoftwareName(task.process)
"""
multiqc -f $options.args .
multiqc --version | sed -e "s/multiqc, version //g" > ${software}.version.txt

cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" )
END_VERSIONS
"""
}
2 changes: 1 addition & 1 deletion modules/multiqc/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ output:
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
pattern: "versions.yml"
authors:
- "@abhi18av"
- "@bunop"
Expand Down
40 changes: 40 additions & 0 deletions tests/test_versions_yml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from pathlib import Path
import pytest
import yaml
grst marked this conversation as resolved.
Show resolved Hide resolved
import re


def _get_workflow_names():
"""Get all names of all workflows which have a test.yml in the tests directory.

To do so, recursively finds all test.yml files and parses their content.
"""
here = Path(__file__).parent.resolve()
pytest_workflow_files = here.glob("**/test.yml")
for f in pytest_workflow_files:
test_config = yaml.safe_load(f.read_text())
for workflow in test_config:
yield workflow["name"]


@pytest.mark.workflow(*_get_workflow_names())
def test_ensure_valid_version_yml(workflow_dir):
workflow_dir = Path(workflow_dir)
software_name = workflow_dir.name.split("_")[0].lower()
versions_yml = (workflow_dir / f"output/{software_name}/versions.yml").read_text()

assert (
"END_VERSIONS" not in versions_yml
), "END_VERSIONS detected in versions.yml. END_VERSIONS being in the text is a sign of an ill-formatted HEREDOC"

# Raises an exception if yaml is not valid
versions = yaml.safe_load(versions_yml)
try:
software_versions = versions[software_name.upper()]
except KeyError:
raise AssertionError("There is no entry `<SOFTWARE>` in versions.yml. ")
assert len(software_versions), "There must be at least one version emitted."
for tool, version in software_versions.items():
assert re.match(
r"^\d+.*", str(version)
), f"Version number for {tool} must start with a number. "