Skip to content

Commit

Permalink
add index for phase
Browse files Browse the repository at this point in the history
  • Loading branch information
RuthEberhardt committed May 1, 2024
1 parent a1d3ba8 commit 8877bf2
Show file tree
Hide file tree
Showing 9 changed files with 298 additions and 8 deletions.
14 changes: 14 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,20 @@
"https://github.com/nf-core/modules.git": {
"modules": {
"nf-core": {
"bcftools/index": {
"branch": "master",
"git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09",
"installed_by": [
"modules"
]
},
"bcftools/merge": {
"branch": "master",
"git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09",
"installed_by": [
"modules"
]
},
"glimpse2/ligate": {
"branch": "master",
"git_sha": "ee7fee68281944b002bd27a8ff3f19200b4d3fad",
Expand Down
8 changes: 4 additions & 4 deletions modules/local/split_vcfs/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ process SPLIT_VCFS {
label 'process_medium'

input:
tuple path(vcf), path(sample_list)
tuple val(meta), path(vcf), path(sample_list)

output:
tuple path('vcf_sample_subset_*.vcf.gz'), path('vcf_sample_subset_*.vcf.gz.csi'), emit: split_vcfs
tuple val(meta), path('vcf_sample_subset_*.vcf.gz'), path('vcf_sample_subset_*.vcf.gz.csi'), emit: split_vcfs

script:
"""
bcftools view -S $sample_list $vcf -Oz -o vcf_sample_subset_${sample_list}.vcf.gz
bcftools index vcf_sample_subset_${sample_list}.vcf.gz
bcftools view -S $sample_list $vcf -Oz -o vcf_sample_subset_${meta.id}.vcf.gz
bcftools index vcf_sample_subset_${meta.id}.vcf.gz
"""

}
7 changes: 7 additions & 0 deletions modules/nf-core/bcftools/index/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: bcftools_index
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::bcftools=1.18
51 changes: 51 additions & 0 deletions modules/nf-core/bcftools/index/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
process BCFTOOLS_INDEX {
tag "$meta.id"
label 'process_low'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0':
'biocontainers/bcftools:1.18--h8b25389_0' }"

input:
tuple val(meta), path(vcf)

output:
tuple val(meta), path("*.csi"), optional:true, emit: csi
tuple val(meta), path("*.tbi"), optional:true, emit: tbi
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
bcftools \\
index \\
$args \\
--threads $task.cpus \\
$vcf
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def extension = args.contains("--tsi") || args.contains("-t") ? "tbi" :
"csi"
"""
touch ${vcf}.${extension}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""
}
48 changes: 48 additions & 0 deletions modules/nf-core/bcftools/index/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: bcftools_index
description: Index VCF tools
keywords:
- vcf
- index
- bcftools
- csi
- tbi
tools:
- bcftools:
description: BCFtools is a set of utilities that manipulate variant calls in the Variant Call Format (VCF) and its binary counterpart BCF. All commands work transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed. Most commands accept VCF, bgzipped VCF and BCF with filetype detected automatically even when streaming from a pipe. Indexed VCF and BCF will work in all situations. Un-indexed VCF and BCF and streams will work in most, but not all situations.
homepage: https://samtools.github.io/bcftools/
documentation: https://samtools.github.io/bcftools/howtos/index.html
tool_dev_url: https://github.com/samtools/bcftools
doi: "10.1093/gigascience/giab008"
licence: ["MIT", "GPL-3.0-or-later"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- VCF:
type: file
description: VCF file (optionally GZIPPED)
pattern: "*.{vcf,vcf.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- version:
type: file
description: File containing software version
pattern: "versions.yml"
- csi:
type: file
description: Default VCF file index file
pattern: "*.csi"
- tbi:
type: file
description: Alternative VCF file index file for larger files (activated with -t parameter)
pattern: "*.tbi"
authors:
- "@jfy133"
maintainers:
- "@jfy133"
7 changes: 7 additions & 0 deletions modules/nf-core/bcftools/merge/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: bcftools_merge
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::bcftools=1.18
64 changes: 64 additions & 0 deletions modules/nf-core/bcftools/merge/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
process BCFTOOLS_MERGE {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0':
'biocontainers/bcftools:1.18--h8b25389_0' }"

input:
tuple val(meta), path(vcfs), path(tbis)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
path(bed)

output:
tuple val(meta), path("*.{bcf,vcf}{,.gz}"), emit: merged_variants
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

def regions = bed ? "--regions-file $bed" : ""
def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
"vcf"

"""
bcftools merge \\
$args \\
$regions \\
--threads $task.cpus \\
--output ${prefix}.${extension} \\
$vcfs
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
"vcf"
"""
touch ${prefix}.${extension}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""
}
88 changes: 88 additions & 0 deletions modules/nf-core/bcftools/merge/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
name: bcftools_merge
description: Merge VCF files
keywords:
- variant calling
- merge
- VCF
tools:
- merge:
description: |
Merge VCF files.
homepage: http://samtools.github.io/bcftools/bcftools.html
documentation: http://www.htslib.org/doc/bcftools.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcfs:
type: file
description: |
List containing 2 or more vcf files
e.g. [ 'file1.vcf', 'file2.vcf' ]
- tbis:
type: file
description: |
List containing the tbi index files corresponding to the vcfs input files
e.g. [ 'file1.vcf.tbi', 'file2.vcf.tbi' ]
- meta2:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- fasta:
type: file
description: "(Optional) The fasta reference file (only necessary for the `--gvcf FILE` parameter)"
pattern: "*.{fasta,fa}"
- meta3:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- fai:
type: file
description: "(Optional) The fasta reference file index (only necessary for the `--gvcf FILE` parameter)"
pattern: "*.fai"
- bed:
type: file
description: "(Optional) The bed regions to merge on"
pattern: "*.bed"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf_gz:
type: file
description: VCF merged output file (bgzipped) => when `--output-type z` is used
pattern: "*.vcf.gz"
- vcf:
type: file
description: VCF merged output file => when `--output-type v` is used
pattern: "*.vcf"
- bcf_gz:
type: file
description: BCF merged output file (bgzipped) => when `--output-type b` is used
pattern: "*.bcf.gz"
- bcf:
type: file
description: BCF merged output file => when `--output-type u` is used
pattern: "*.bcf"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@joseespinosa"
- "@drpatelh"
- "@nvnieuwk"
- "@ramprasadn"
maintainers:
- "@joseespinosa"
- "@drpatelh"
- "@nvnieuwk"
- "@ramprasadn"
19 changes: 15 additions & 4 deletions workflows/run_glimpse.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
include { SPLIT_SAMPLES } from '../modules/local/split_samples/main'
include { SPLIT_VCFS } from '../modules/local/split_vcfs/main'
include { GLIMPSE2_PHASE } from '../modules/nf-core/glimpse2/phase/main'
include { BCFTOOLS_INDEX as INDEX_PHASE } from '../modules/nf-core/bcftools/index/main.nf'
include { BCFTOOLS_INDEX as INDEX_LIGATE } from '../modules/nf-core/bcftools/index/main.nf'

workflow RUN_GLIMPSE {

Expand All @@ -9,23 +11,32 @@ workflow RUN_GLIMPSE {
vcf = channel.fromPath(params.vcf_in)
vcf_samples = vcf.combine(SPLIT_SAMPLES.out.sample_lists.flatten())

SPLIT_VCFS(vcf_samples)
// // SPLIT_VCFS.out.split_vcfs.view()
vcf_samples_input = vcf_samples.map{
vcf, sample_list ->
[[id: vcf.getSimpleName() + "-" + sample_list.getName(), batch: sample_list.getName()], vcf, sample_list]
}

SPLIT_VCFS(vcf_samples_input)
SPLIT_VCFS.out.split_vcfs.view()

ref = channel.fromPath("${params.refdir}*.bin")

phase_meta = [id: 'phase']

phase_input = SPLIT_VCFS.out.split_vcfs.combine(ref).map{
vcf, index , ref_bin ->
[phase_meta, vcf, index, [], [], [], ref_bin, [], []]
meta, vcf, index , ref_bin ->
[meta, vcf, index, [], [], [], ref_bin, [], []]
}
phase_input2 = ['', params.fasta, params.fai]

GLIMPSE2_PHASE(phase_input, phase_input2)

GLIMPSE2_PHASE.out.versions.view()

INDEX_PHASE ( GLIMPSE2_PHASE.out.phased_variants )

// attempt for splitting samples in phase
///
// phase_input = vcf_samples.combine(ref).map {
// vcf, samples, ref_bin ->
// [phase_meta, vcf, [], samples, [], [], ref_bin, [], []]
Expand Down

0 comments on commit 8877bf2

Please sign in to comment.