From b413a8eb189afd313d33780867c5ba649165cf88 Mon Sep 17 00:00:00 2001 From: Ruth Eberhardt Date: Mon, 29 Apr 2024 14:00:05 +0100 Subject: [PATCH] add phase --- bsub_nextflow.sh | 22 +++++++++ conf/base.config | 50 +++++++++++++++++++ modules/local/split_vcfs/main.nf | 2 +- nextflow.config | 85 +++++++++++++++++++++++++++++++- workflows/run_glimpse.nf | 13 +++++ 5 files changed, 170 insertions(+), 2 deletions(-) create mode 100644 bsub_nextflow.sh create mode 100644 conf/base.config diff --git a/bsub_nextflow.sh b/bsub_nextflow.sh new file mode 100644 index 0000000..78d8dfe --- /dev/null +++ b/bsub_nextflow.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# to run: bsub < $PWD/bsub_nextflow.sh + +#BSUB -o /path/to/a/log/dir/%J.o +#BSUB -e /path/to/a/log/dir/%J.e +#BSUB -M 8000 +#BSUB -q oversubscribed +#BSUB -n 2 + +export HTTP_PROXY='http://wwwcache.sanger.ac.uk:3128' +export HTTPS_PROXY='http://wwwcache.sanger.ac.uk:3128' +export NXF_ANSI_LOG=false +export NXF_OPTS="-Xms8G -Xmx8G -Dnxf.pool.maxThreads=2000" +export NXF_VER=22.04.0-5697 + + +nextflow run \ +${pwd}/main.nf \ +-profile sanger \ +-with-trace \ +-resume diff --git a/conf/base.config b/conf/base.config new file mode 100644 index 0000000..1d87035 --- /dev/null +++ b/conf/base.config @@ -0,0 +1,50 @@ +process { + + // TODO nf-core: Check the defaults for all processes + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 2.GB * task.attempt, 'memory' ) } + time = { check_max( 1.h * task.attempt, 'time' ) } + + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + + // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // TODO nf-core: Customise requirements for specific processes. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 1.GB * task.attempt, 'memory' ) } + time = { check_max( 1.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + time = { check_max( 2.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 10.GB * task.attempt, 'memory' ) } + time = { check_max( 12.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 20.h * task.attempt, 'time' ) } + } + withLabel:process_high_memory { + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + } + + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } + +} \ No newline at end of file diff --git a/modules/local/split_vcfs/main.nf b/modules/local/split_vcfs/main.nf index b62c0ff..d9eb738 100644 --- a/modules/local/split_vcfs/main.nf +++ b/modules/local/split_vcfs/main.nf @@ -4,7 +4,7 @@ process SPLIT_VCFS { tuple path(vcf), path(sample_list) output: - path('vcf_sample_subset.vcf.gz') + tuple path('vcf_sample_subset.vcf.gz'), path('vcf_sample_subset.vcf.gz.csi'), emit: split_vcfs script: """ diff --git a/nextflow.config b/nextflow.config index 4b89a33..03954e7 100755 --- a/nextflow.config +++ b/nextflow.config @@ -1,8 +1,91 @@ +includeConfig 'conf/base.config' + params { // number of samples to be processed in each batch. Recommended 100-200 batch_size = 10 vcf_in = "/lustre/scratch126/humgen/teams/hgi/users/re3/blended_genomes_exomes/glimpse_pipe_test/test_vcfs/test_for_glimpse.vcf.gz" + // refdir = "/lustre/scratch125/humgen/resources/GLIMPSE/1000g_chunked_for_GLIMPSE/defaults_snp_biallelic/split/" + refdir = "/lustre/scratch126/humgen/teams/hgi/users/re3/blended_genomes_exomes/glimpse_pipe_test/ref_mini/" workdir = "/lustre/scratch126/humgen/teams/hgi/users/re3/blended_genomes_exomes/glimpse_pipe_test/work" + + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" -} \ No newline at end of file +} + +// Load nf-core custom profiles from different Institutions +try { + includeConfig "https://raw.githubusercontent.com/nf-core/configs/master/nfcore_custom.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config profiles: https://raw.githubusercontent.com/nf-core/configs/master/nfcore_custom.config") +} + + +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} + + +//process{ + // executor = 'lsf' + // queue = { task.time < 20.m ? 'small' : task.time < 12.h ? 'normal' : task.time < 48.h ? 'long' : task.time < 168.h ? 'week' : 'basement' } + // queue = 'basement' + + // withLabel:process_small{ + // cpus = 6 + // queue = 'small' + // time = {0.1h * task.attempt} + // errorStrategy = 'retry' + // memory = 50.MB + // } + + // withLabel:process_medium{ + // cpus = 6 + // queue = 'normal' + // time = {2h * task.attempt} + // errorStrategy = 'retry' + // memory = 200.MB + // } +//} + +//executor{ + // name = 'lsf' + // perJobMemLimit = true + // poolSize = 4 + // submitRateLimit = '5 sec' + // killBatchSize = 50 +//} + +//singularity { +// enabled = true +// cacheDir = '/nfs/hgi/singularityContainers/' + // runOptions = '--bind /lustre' +//} \ No newline at end of file diff --git a/workflows/run_glimpse.nf b/workflows/run_glimpse.nf index ebe66cb..0e0a9d2 100755 --- a/workflows/run_glimpse.nf +++ b/workflows/run_glimpse.nf @@ -1,5 +1,6 @@ include { SPLIT_SAMPLES } from '../modules/local/split_samples/main' include { SPLIT_VCFS } from '../modules/local/split_vcfs/main' +include { GLIMPSE2_PHASE } from '../modules/nf-core/glimpse2/phase/main' workflow RUN_GLIMPSE { @@ -10,4 +11,16 @@ workflow RUN_GLIMPSE { SPLIT_VCFS(vcf_samples) + ref = channel.fromPath("${params.refdir}*.bin") + + phase_input = SPLIT_VCFS.out.split_vcfs.combine(ref).map{ + vcf, index , ref_bin -> + [[], vcf, index, [], [], [], ref_bin, [], []] + } + phase_input2 = channel.empty() + + GLIMPSE2_PHASE(phase_input, phase_input2) + + GLIMPSE2_PHASE.out.versions.view() + } \ No newline at end of file