From b413a8eb189afd313d33780867c5ba649165cf88 Mon Sep 17 00:00:00 2001
From: Ruth Eberhardt <re3@sanger.ac.uk>
Date: Mon, 29 Apr 2024 14:00:05 +0100
Subject: [PATCH] add phase

---
 bsub_nextflow.sh                 | 22 +++++++++
 conf/base.config                 | 50 +++++++++++++++++++
 modules/local/split_vcfs/main.nf |  2 +-
 nextflow.config                  | 85 +++++++++++++++++++++++++++++++-
 workflows/run_glimpse.nf         | 13 +++++
 5 files changed, 170 insertions(+), 2 deletions(-)
 create mode 100644 bsub_nextflow.sh
 create mode 100644 conf/base.config

diff --git a/bsub_nextflow.sh b/bsub_nextflow.sh
new file mode 100644
index 0000000..78d8dfe
--- /dev/null
+++ b/bsub_nextflow.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# to run: bsub < $PWD/bsub_nextflow.sh
+
+#BSUB -o /path/to/a/log/dir/%J.o
+#BSUB -e /path/to/a/log/dir/%J.e
+#BSUB -M 8000
+#BSUB -q oversubscribed
+#BSUB -n 2
+
+export HTTP_PROXY='http://wwwcache.sanger.ac.uk:3128'
+export HTTPS_PROXY='http://wwwcache.sanger.ac.uk:3128'
+export NXF_ANSI_LOG=false
+export NXF_OPTS="-Xms8G -Xmx8G -Dnxf.pool.maxThreads=2000"
+export NXF_VER=22.04.0-5697
+
+
+nextflow run \
+${pwd}/main.nf \
+-profile sanger \
+-with-trace \
+-resume
diff --git a/conf/base.config b/conf/base.config
new file mode 100644
index 0000000..1d87035
--- /dev/null
+++ b/conf/base.config
@@ -0,0 +1,50 @@
+process {
+
+    // TODO nf-core: Check the defaults for all processes
+    cpus   = { check_max( 1    * task.attempt, 'cpus'   ) }
+    memory = { check_max( 2.GB * task.attempt, 'memory' ) }
+    time   = { check_max( 1.h  * task.attempt, 'time'   ) }
+
+    errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
+    maxRetries    = 1
+    maxErrors     = '-1'
+
+    // Process-specific resource requirements
+    // NOTE - Please try and re-use the labels below as much as possible.
+    //        These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
+    //        If possible, it would be nice to keep the same label naming convention when
+    //        adding in your local modules too.
+    // TODO nf-core: Customise requirements for specific processes.
+    // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
+
+    withLabel:process_low {
+        cpus   = { check_max( 2     * task.attempt, 'cpus'    ) }
+        memory = { check_max( 1.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 1.h   * task.attempt, 'time'    ) }
+    }
+    withLabel:process_medium {
+        cpus   = { check_max( 4     * task.attempt, 'cpus'    ) }
+        memory = { check_max( 4.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 2.h   * task.attempt, 'time'    ) }
+    }
+    withLabel:process_high {
+        cpus   = { check_max( 8    * task.attempt, 'cpus'    ) }
+        memory = { check_max( 10.GB * task.attempt, 'memory' ) }
+        time   = { check_max( 12.h  * task.attempt, 'time'   ) }
+    }
+    withLabel:process_long {
+        time   = { check_max( 20.h  * task.attempt, 'time'    ) }
+    }
+    withLabel:process_high_memory {
+        memory = { check_max( 200.GB * task.attempt, 'memory' ) }
+    }
+
+    withLabel:error_ignore {
+        errorStrategy = 'ignore'
+    }
+    withLabel:error_retry {
+        errorStrategy = 'retry'
+        maxRetries    = 2
+    }
+
+}
\ No newline at end of file
diff --git a/modules/local/split_vcfs/main.nf b/modules/local/split_vcfs/main.nf
index b62c0ff..d9eb738 100644
--- a/modules/local/split_vcfs/main.nf
+++ b/modules/local/split_vcfs/main.nf
@@ -4,7 +4,7 @@ process SPLIT_VCFS {
     tuple path(vcf), path(sample_list)
 
     output:
-    path('vcf_sample_subset.vcf.gz')
+    tuple path('vcf_sample_subset.vcf.gz'), path('vcf_sample_subset.vcf.gz.csi'), emit: split_vcfs
 
     script:
     """
diff --git a/nextflow.config b/nextflow.config
index 4b89a33..03954e7 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,8 +1,91 @@
+includeConfig 'conf/base.config'
+
 params {
     
     // number of samples to be processed in each batch. Recommended 100-200
     batch_size = 10 
     vcf_in = "/lustre/scratch126/humgen/teams/hgi/users/re3/blended_genomes_exomes/glimpse_pipe_test/test_vcfs/test_for_glimpse.vcf.gz"
+    // refdir = "/lustre/scratch125/humgen/resources/GLIMPSE/1000g_chunked_for_GLIMPSE/defaults_snp_biallelic/split/"
+    refdir = "/lustre/scratch126/humgen/teams/hgi/users/re3/blended_genomes_exomes/glimpse_pipe_test/ref_mini/"
     workdir = "/lustre/scratch126/humgen/teams/hgi/users/re3/blended_genomes_exomes/glimpse_pipe_test/work"
+
+    custom_config_version      = 'master'
+    custom_config_base         = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
     
-}
\ No newline at end of file
+}
+
+// Load nf-core custom profiles from different Institutions
+try {
+    includeConfig "https://raw.githubusercontent.com/nf-core/configs/master/nfcore_custom.config"
+} catch (Exception e) {
+    System.err.println("WARNING: Could not load nf-core/config profiles: https://raw.githubusercontent.com/nf-core/configs/master/nfcore_custom.config")
+}
+
+
+def check_max(obj, type) {
+    if (type == 'memory') {
+        try {
+            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
+                return params.max_memory as nextflow.util.MemoryUnit
+            else
+                return obj
+        } catch (all) {
+            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
+            return obj
+        }
+    } else if (type == 'time') {
+        try {
+            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
+                return params.max_time as nextflow.util.Duration
+            else
+                return obj
+        } catch (all) {
+            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
+            return obj
+        }
+    } else if (type == 'cpus') {
+        try {
+            return Math.min( obj, params.max_cpus as int )
+        } catch (all) {
+            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
+            return obj
+        }
+    }
+}
+
+
+//process{
+ //   executor = 'lsf'
+    // queue = { task.time < 20.m ? 'small' : task.time < 12.h ? 'normal' : task.time < 48.h ? 'long' : task.time < 168.h ? 'week' : 'basement' }
+ //   queue = 'basement'
+
+ //   withLabel:process_small{
+ //       cpus = 6
+  //      queue = 'small'
+ //       time = {0.1h * task.attempt}
+ //       errorStrategy = 'retry'
+ //       memory = 50.MB 
+  //  }
+
+  //  withLabel:process_medium{
+  //      cpus = 6
+  //      queue = 'normal'
+  //      time = {2h * task.attempt}
+  //      errorStrategy = 'retry'
+  //      memory = 200.MB 
+  //  }
+//}
+
+//executor{
+ //   name = 'lsf'
+ //   perJobMemLimit = true
+ //   poolSize = 4
+ //   submitRateLimit = '5 sec'
+ //   killBatchSize = 50
+//}
+
+//singularity {
+//  enabled = true
+//  cacheDir = '/nfs/hgi/singularityContainers/'
+ // runOptions = '--bind /lustre'
+//}
\ No newline at end of file
diff --git a/workflows/run_glimpse.nf b/workflows/run_glimpse.nf
index ebe66cb..0e0a9d2 100755
--- a/workflows/run_glimpse.nf
+++ b/workflows/run_glimpse.nf
@@ -1,5 +1,6 @@
 include { SPLIT_SAMPLES } from '../modules/local/split_samples/main'
 include { SPLIT_VCFS } from '../modules/local/split_vcfs/main'
+include { GLIMPSE2_PHASE } from '../modules/nf-core/glimpse2/phase/main'
 
 workflow RUN_GLIMPSE {
 
@@ -10,4 +11,16 @@ workflow RUN_GLIMPSE {
 
     SPLIT_VCFS(vcf_samples)
 
+    ref = channel.fromPath("${params.refdir}*.bin")
+    
+    phase_input = SPLIT_VCFS.out.split_vcfs.combine(ref).map{
+                                                                    vcf, index , ref_bin ->
+                                                                    [[], vcf, index, [], [], [], ref_bin, [], []]
+                                                                }
+    phase_input2 = channel.empty()
+
+    GLIMPSE2_PHASE(phase_input, phase_input2)
+
+    GLIMPSE2_PHASE.out.versions.view()
+
 }
\ No newline at end of file