Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Notebook modules #617

Merged
merged 48 commits into from
Oct 24, 2021
Merged
Changes from 8 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
803bdbb
Draft rmarkdown module
grst Jul 24, 2021
c32aab9
stub jupyter notebook module
grst Jul 24, 2021
1bb979e
Create yaml file with params
grst Jul 25, 2021
2578be2
Update meta.yml for rmarkdown module
grst Jul 25, 2021
45252df
Add comment on YAML
grst Jul 25, 2021
d24be9a
Update notebooks module, clean up parametrize.nf
grst Jul 26, 2021
d749782
Two separate channels for parameters and input files
grst Jul 31, 2021
7ff964e
Fix Rmd render script
grst Jul 31, 2021
2a5a147
Add tests for rmarkdown
grst Aug 2, 2021
2375450
Fix tests for rmarkdown module
grst Aug 2, 2021
ba3adb6
Update checksums
grst Aug 2, 2021
aff5e25
Fix tests for jupyter
grst Aug 2, 2021
8195a2a
Test without Grab()
grst Aug 2, 2021
bfb0776
Update software versions
grst Aug 3, 2021
c7eb419
update rmarkdown dependencies
grst Aug 3, 2021
af8d617
Draft for multiple versions
grst Aug 3, 2021
38fa369
Fix indent of script
grst Aug 3, 2021
1c065ad
Fix indent in rmarkdown script
grst Aug 3, 2021
311d8e4
Emit version.syml
grst Aug 3, 2021
5232e6c
Update modules/rmarkdown/main.nf
grst Aug 4, 2021
f48d159
Update modules/rmarkdown/meta.yml
grst Aug 4, 2021
51d6fb2
Update modules/rmarkdown/meta.yml
grst Aug 4, 2021
f71bd59
Rename rmarkdown to rmarkdownnotebook
grst Aug 4, 2021
08e0d49
Add rmarkdown mulled biocontainer
grst Aug 4, 2021
2fde477
Write sessionInfo to separate log file
grst Aug 5, 2021
7aadf4b
Update rmarkdownnotebook
grst Aug 5, 2021
fdd2783
Sessioninfo does not have a stable md5sum
grst Aug 5, 2021
fe2706b
Update jupyternotebook
grst Aug 5, 2021
2add3ea
Update meta
grst Aug 5, 2021
0b5f14a
Add jupyternotebook biocontainers
grst Aug 9, 2021
64aae51
Handle Groovy Gstrings in parameterize
grst Sep 10, 2021
c9dc356
Merge branch 'master' into notebook-modules
grst Sep 29, 2021
cba6f04
Update to versions.yml
grst Oct 12, 2021
d4e9993
Merge remote-tracking branch 'upstream/master' into notebook-modules
grst Oct 12, 2021
6f7ae10
Update functions.nf
grst Oct 12, 2021
54b4ad0
Fix versions yaml
grst Oct 12, 2021
9ba1b87
Fix EC lint
grst Oct 12, 2021
dec9981
Update modules/rmarkdownnotebook/main.nf
grst Oct 13, 2021
fdcc64c
Update modules/jupyternotebook/main.nf
grst Oct 13, 2021
560178c
Use official test data
grst Oct 13, 2021
7f99cda
Harshilify
grst Oct 13, 2021
ecf2a00
Merge branch 'master' into notebook-modules
grst Oct 13, 2021
c1b9234
Make parameters channel clearer
grst Oct 13, 2021
222d6c2
Merge branch 'master' into notebook-modules
drpatelh Oct 23, 2021
46cb8cb
Apply suggestions from code review
grst Oct 24, 2021
38ad13d
Merge branch 'master' into notebook-modules
grst Oct 24, 2021
807414a
Apply suggestions from code review
grst Oct 24, 2021
653f5cc
Update main.nf
grst Oct 24, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions modules/jupyternotebook/functions.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
//
// Utility functions used in nf-core DSL2 module files
//

//
// Extract name of software tool from process name using $task.process
//
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}

//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_meta = args.publish_by_meta ?: []
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}

//
// Tidy up and join elements of a list to return a path string
//
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}

//
// Function to save/publish module results
//
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
}
87 changes: 87 additions & 0 deletions modules/jupyternotebook/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'
include { dump_params_yml } from "./parametrize"

params.options = [:]
options = initOptions(params.options)
params.parametrize = true
params.implicit_params = true
params.meta_params = true

process JUPYTERNOTEBOOK {
tag "$meta.id"
label 'process_low'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }

//NB: You likely want to override this with a container containing all required
//dependencies for you analysis. The container at least needs to contain the
//yaml and rmarkdown R packages.
//TODO: what container to use as default image?
conda (params.enable_conda ? "ipykernel jupytext nbconvert papermill" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE"
} else {
container "quay.io/biocontainers/YOUR-TOOL-HERE"
}

input:
tuple val(meta), path(notebook)
val(parameters)
path(input_files)

output:
tuple val(meta), path("*.html"), emit: report
path("artifacts/*"), emit: artifacts
path "*.version.txt" , emit: version

script:
def software = getSoftwareName(task.process)
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"

// Dump parameters to yaml file.
// Using a yaml file over using the CLI params because
// * no issue with escaping
// * allows to pass nested maps instead of just single values
def params_cmd = ""
def render_cmd = ""
if (params.parametrize) {
nb_params = [:]
if (params.implicit_params) {
nb_params["cpus"] = task.cpus
nb_params["artifact_dir"] = "artifacts"
nb_params["input_dir"] = "./"
}
if (params.meta_params) {
nb_params["meta"] = meta
}
nb_params += parameters
params_cmd = dump_params_yml(nb_params)
render_cmd = "papermill -f .params.yml"
} else {
render_cmd = "papermill"
}

"""
# Create output directory
mkdir artifacts

# Set parallelism for BLAS/MKL etc. to avoid over-booking of resources
export MKL_NUM_THREADS="${task.cpus}"
export OPENBLAS_NUM_THREADS="${task.cpus}"
export OMP_NUM_THREADS="${task.cpus}"
export NUMBA_NUM_THREADS="${task.cpus}"

# dump parameters to yaml
${params_cmd}

# Convert notebook to ipynb using jupytext, execute using papermill, convert using nbconvert
jupytext --to notebook --output - --set-kernel - ${notebook} \\
| ${render_cmd} \\
| jupyter nbconvert --stdin --to html --output ${notebook.baseName}.html

# TODO how to output versions of multiple tools?
echo \$(jupytext --version) > ${software}.version.txt
"""
}
47 changes: 47 additions & 0 deletions modules/jupyternotebook/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: jupyternotebook
## TODO nf-core: Add a description of the module and list keywords
description: write your description here
keywords:
- sort
tools:
- jupyternotebook:
## TODO nf-core: Add a description and other details for the software below
description:
homepage:
documentation:
tool_dev_url:
doi: ""
licence:

## TODO nf-core: Add a description of all of the variables used as input
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
## TODO nf-core: Delete / customise this example input
- bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"

## TODO nf-core: Add a description of all of the variables used as output
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
## TODO nf-core: Delete / customise this example output
- bam:
type: file
description: Sorted BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"

authors:
- "@grst"
1 change: 1 addition & 0 deletions modules/jupyternotebook/parametrize.nf
68 changes: 68 additions & 0 deletions modules/rmarkdown/functions.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
//
// Utility functions used in nf-core DSL2 module files
//

//
// Extract name of software tool from process name using $task.process
//
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}

//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_meta = args.publish_by_meta ?: []
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}

//
// Tidy up and join elements of a list to return a path string
//
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}

//
// Function to save/publish module results
//
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
}
91 changes: 91 additions & 0 deletions modules/rmarkdown/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'
include { dump_params_yml } from "./parametrize"

params.options = [:]
options = initOptions(params.options)
params.parametrize = true
params.implicit_params = true
params.meta_params = true

process RMARKDOWN {
tag "$meta.id"
label 'process_low'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }

//NB: You likely want to override this with a container containing all required
//dependencies for you analysis. The container at least needs to contain the
//yaml and rmarkdown R packages.
conda (params.enable_conda ? "r-tidyverse=1.2.1" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/r-tidyverse:1.2.1"
} else {
container "quay.io/biocontainers/r-tidyverse:1.2.1"
}

input:
tuple val(meta), path(notebook)
val(parameters)
path(input_files)

output:
tuple val(meta), path("*.html"), emit: report
path("artifacts/*"), emit: artifacts, optional: true
path "*.version.txt", emit: version

script:
def software = getSoftwareName(task.process)
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check the rmd file contain sessionInfo section or not. if not add seesionInfo() at the end of rmd file.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While I like the idea of storing sessionInfo, I'm not a big fan of modifying the notebook. But maybe we can separately dump the sessionInfo into a log file.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sessionInfo might be overkill just for the rendering the notebook itself.

https://github.com/nf-core/modules#general

If the software is unable to output a version number on the command-line then a variable called VERSION can be manually specified to create this file e.g. homer/annotatepeaks module.

Might be sufficient? Or a some custom Rscript command to load the two packages and then run sessionInfo() and
then some bash cleanup?

Half completed example:

R -e "library('rmarkdown')" -e "library('yaml')" -e 'sessionInfo()' | grep -e 'R version' -e 'other' -A 1

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I added a sessioninfo log as separate output.
Into the version.yml/versions.txt, I will only include the rmarkdown and yaml packages which are directly needed for rendering the notebook.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jfy133, this was the previous discussion... I think we agreed that all versions in the versions.yml is overkill, but I added a separate log file with the session info.

// Dump parameters to yaml file.
// Using a yaml file over using the CLI params because
// * no issue with escaping
// * allows to pass nested maps instead of just single values
def params_cmd = ""
def render_cmd = ""
if (params.parametrize) {
nb_params = [:]
if (params.implicit_params) {
nb_params["cpus"] = task.cpus
nb_params["artifact_dir"] = "artifacts"
nb_params["input_dir"] = "./"
}
if (params.meta_params) {
nb_params["meta"] = meta
}
nb_params += parameters
params_cmd = dump_params_yml(nb_params)
render_cmd = (
"params = yaml::read_yaml('.params.yml')\n" +
"rmarkdown::render('${notebook}', params=params, envir=new.env())"
)
} else {
render_cmd = "rmarkdown::render('${notebook}')"
}

"""
# Create output directory
mkdir artifacts

# Set parallelism for BLAS/MKL etc. to avoid over-booking of resources
export MKL_NUM_THREADS="${task.cpus}"
export OPENBLAS_NUM_THREADS="${task.cpus}"
export OMP_NUM_THREADS="${task.cpus}"

# dump parameters to yaml
${params_cmd}

# work around https://github.com/rstudio/rmarkdown/issues/1508
mv "${notebook}" "${notebook}.orig"
cp -L "${notebook}.orig" "${notebook}"

# Render notebook
Rscript - <<EOF
${render_cmd}
\nEOF

echo \$(Rscript -e "cat(paste(packageVersion('rmarkdown'), collapse='.'))") > ${software}.version.txt
"""
}
Loading