Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

realignment filter in M2 wdl #4848

Merged
merged 2 commits into from
Jun 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 79 additions & 5 deletions scripts/mutect2_wdl/mutect2.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
## onco_ds_tar_gz, default_config_file: Oncotator datasources and config file
## sequencing_center, sequence_source: metadata for Oncotator
## filter_oncotator_maf: Whether the MAF generated by oncotator should have the filtered variants removed. Default: true
## realignment_index_bundle: resource for FilterAlignmentArtifacts, which runs if and only if it is specified. Generated by BwaMemIndexImageCreator.
##
## Outputs :
## - One VCF file and its index with primary filtering applied; secondary filtering and functional annotation if requested; a bamout.bam
Expand Down Expand Up @@ -73,6 +74,8 @@ workflow Mutect2 {
File? gnomad_index
File? variants_for_contamination
File? variants_for_contamination_index
File? realignment_index_bundle
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you update the comments and any relevant READMEs?

String? realignment_extra_args
Boolean? run_orientation_bias_filter
Boolean run_ob_filter = select_first([run_orientation_bias_filter, false])
Array[String]? artifact_modes
Expand Down Expand Up @@ -304,8 +307,26 @@ workflow Mutect2 {
}
}

if (defined(realignment_index_bundle)) {
File realignment_filter_input = select_first([FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
File realignment_filter_input_idx = select_first([FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
call FilterRealignmentArtifacts {
input:
gatk_override = gatk_override,
bam = tumor_bam,
bai = tumor_bai,
realignment_index_bundle = select_first([realignment_index_bundle]),
realignment_extra_args = realignment_extra_args,
gatk_docker = gatk_docker,
compress = compress,
output_name = filtered_name,
input_vcf = realignment_filter_input,
input_vcf_idx = realignment_filter_input_idx
}
}

if (run_oncotator_or_default) {
File oncotate_vcf_input = select_first([FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
File oncotate_vcf_input = select_first([FilterRealignmentArtifacts.filtered_vcf, FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
call oncotate_m2 {
input:
m2_vcf = oncotate_vcf_input,
Expand All @@ -325,8 +346,8 @@ workflow Mutect2 {
}

if (run_funcotator_or_default) {
File funcotate_vcf_input = select_first([FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
File funcotate_vcf_input_index = select_first([FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
File funcotate_vcf_input = select_first([FilterRealignmentArtifacts.filtered_vcf, FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
File funcotate_vcf_input_index = select_first([FilterRealignmentArtifacts.filtered_vcf_index, FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
call Funcotate {
input:
m2_vcf = funcotate_vcf_input,
Expand All @@ -351,8 +372,8 @@ workflow Mutect2 {
output {
File unfiltered_vcf = MergeVCFs.merged_vcf
File unfiltered_vcf_index = MergeVCFs.merged_vcf_index
File filtered_vcf = select_first([FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
File filtered_vcf_index = select_first([FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
File filtered_vcf = select_first([FilterRealignmentArtifacts.filtered_vcf, FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
File filtered_vcf_index = select_first([FilterRealignmentArtifacts.filtered_vcf_index, FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
File? contamination_table = CalculateContamination.contamination_table

File? oncotated_m2_maf = oncotate_m2.oncotated_m2_maf
Expand Down Expand Up @@ -795,6 +816,59 @@ task FilterByOrientationBias {
}
}

task FilterRealignmentArtifacts {
#input
File? gatk_override
File input_vcf
File input_vcf_idx
File bam
File bai
String output_name
Boolean compress
String output_vcf = output_name + if compress then ".vcf.gz" else ".vcf"
String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx"
File realignment_index_bundle
String? realignment_extra_args

# runtime
String gatk_docker
Int? mem
Int? preemptible_attempts
Int? disk_space
Int? cpu
Boolean use_ssd = false

# Mem is in units of GB but our command and memory runtime values are in MB
Int machine_mem = if defined(mem) then mem * 1000 else 9000
Int command_mem = machine_mem - 500

command {
set -e

export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}

gatk --java-options "-Xmx${command_mem}m" FilterAlignmentArtifacts \
-V ${input_vcf} \
-I ${bam} \
--bwa-mem-index-image ${realignment_index_bundle} \
${realignment_extra_args} \
-O ${output_vcf}
}

runtime {
docker: gatk_docker
memory: command_mem + " MB"
disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
preemptible: select_first([preemptible_attempts, 10])
cpu: select_first([cpu, 1])
}

output {
File filtered_vcf = "${output_vcf}"
File filtered_vcf_index = "${output_vcf_index}"
}
}

task oncotate_m2 {
# inputs
File m2_vcf
Expand Down
77 changes: 73 additions & 4 deletions scripts/mutect2_wdl/mutect2_nio.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
## onco_ds_tar_gz, default_config_file: Oncotator datasources and config file
## sequencing_center, sequence_source: metadata for Oncotator
## filter_oncotator_maf: Whether the MAF generated by oncotator should have the filtered variants removed. Default: true
## realignment_index_bundle: resource for FilterAlignmentArtifacts, which runs if and only if it is specified. Generated by BwaMemIndexImageCreator.
##
## Outputs :
## - One VCF file and its index with primary filtering applied; secondary filtering and functional annotation if requested; a bamout.bam
Expand Down Expand Up @@ -74,6 +75,8 @@ workflow Mutect2 {
Int scatter_count
File? gnomad
File? variants_for_contamination
File? realignment_index_bundle
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add comments above?

String? realignment_extra_args
Boolean? run_orientation_bias_filter
Boolean run_ob_filter = select_first([run_orientation_bias_filter, false])
Array[String]? artifact_modes
Expand Down Expand Up @@ -290,8 +293,23 @@ workflow Mutect2 {
}
}

if (defined(realignment_index_bundle)) {
File realignment_filter_input = select_first([FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
call FilterRealignmentArtifacts {
input:
gatk_override = gatk_override,
bam = tumor_bam,
realignment_index_bundle = select_first([realignment_index_bundle]),
realignment_extra_args = realignment_extra_args,
gatk_docker = gatk_docker,
compress = compress,
output_name = filtered_name,
input_vcf = realignment_filter_input
}
}

if (run_oncotator_or_default) {
File oncotate_vcf_input = select_first([FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
File oncotate_vcf_input = select_first([FilterRealignmentArtifacts.filtered_vcf, FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
call oncotate_m2 {
input:
m2_vcf = oncotate_vcf_input,
Expand All @@ -312,7 +330,7 @@ workflow Mutect2 {

if (run_funcotator_or_default) {
File funcotate_vcf_input = select_first([FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
File funcotate_vcf_input_index = select_first([FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
File funcotate_vcf_input_index = select_first([FilterRealignmentArtifacts.filtered_vcf, FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
call Funcotate {
input:
m2_vcf = funcotate_vcf_input,
Expand All @@ -334,8 +352,8 @@ workflow Mutect2 {
output {
File unfiltered_vcf = MergeVCFs.merged_vcf
File unfiltered_vcf_index = MergeVCFs.merged_vcf_index
File filtered_vcf = select_first([FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
File filtered_vcf_index = select_first([FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
File filtered_vcf = select_first([FilterRealignmentArtifacts.filtered_vcf, FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
File filtered_vcf_index = select_first([FilterRealignmentArtifacts.filtered_vcf_index, FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
File? contamination_table = CalculateContamination.contamination_table

File? oncotated_m2_maf = oncotate_m2.oncotated_m2_maf
Expand Down Expand Up @@ -765,6 +783,57 @@ task FilterByOrientationBias {
}
}

task FilterRealignmentArtifacts {
#input
File? gatk_override
String input_vcf
String bam
String output_name
Boolean compress
String output_vcf = output_name + if compress then ".vcf.gz" else ".vcf"
String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx"
File realignment_index_bundle
String? realignment_extra_args

# runtime
String gatk_docker
Int? mem
Int? preemptible_attempts
Int? disk_space
Int? cpu
Boolean use_ssd = false

# Mem is in units of GB but our command and memory runtime values are in MB
Int machine_mem = if defined(mem) then mem * 1000 else 9000
Int command_mem = machine_mem - 500

command {
set -e

export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}

gatk --java-options "-Xmx${command_mem}m" FilterAlignmentArtifacts \
-V ${input_vcf} \
-I ${bam} \
--bwa-mem-index-image ${realignment_index_bundle} \
${realignment_extra_args} \
-O ${output_vcf}
}

runtime {
docker: gatk_docker
memory: command_mem + " MB"
disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
preemptible: select_first([preemptible_attempts, 10])
cpu: select_first([cpu, 1])
}

output {
File filtered_vcf = "${output_vcf}"
File filtered_vcf_index = "${output_vcf_index}"
}
}

task oncotate_m2 {
# inputs
File m2_vcf
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,11 @@ public class FilterAlignmentArtifacts extends VariantWalker {
doc="Sufficient number of good read realignments to accept a variant.", optional=true)
private int sufficientGoodRealignments = DEFAULT_SUFFICIENT_GOOD_REALIGNMENTS;

public static final String DONT_SKIP_ALREADY_FILTERED_VARIANTS_LONG_NAME = "dont-skip-filtered-variants";
@Argument(fullName = DONT_SKIP_ALREADY_FILTERED_VARIANTS_LONG_NAME,
doc="Try to realign all variants, even ones that have already been filtered.", optional=true)
private boolean dontSkipFilteredVariants = false;


@ArgumentCollection
protected RealignmentArgumentCollection realignmentArgumentCollection = new RealignmentArgumentCollection();
Expand Down Expand Up @@ -143,7 +148,7 @@ public Object onTraversalSuccess() {

@Override
public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext refContext, final FeatureContext fc) {
Trilean passesFilter = vc.getNAlleles() == 1 ? Trilean.TRUE : Trilean.UNKNOWN;
Trilean passesFilter = vc.getNAlleles() == 1 || (vc.isFiltered() && !dontSkipFilteredVariants) ? Trilean.TRUE : Trilean.UNKNOWN;

final MutableInt failedRealignmentCount = new MutableInt(0);
final MutableInt succeededRealignmentCount = new MutableInt(0);
Expand Down