diff --git a/src/main/java/org/broadinstitute/hellbender/cmdline/StandardArgumentDefinitions.java b/src/main/java/org/broadinstitute/hellbender/cmdline/StandardArgumentDefinitions.java index 45d8ff26d84..a49839d414e 100644 --- a/src/main/java/org/broadinstitute/hellbender/cmdline/StandardArgumentDefinitions.java +++ b/src/main/java/org/broadinstitute/hellbender/cmdline/StandardArgumentDefinitions.java @@ -37,6 +37,7 @@ private StandardArgumentDefinitions(){} public static final String ANNOTATIONS_TO_EXCLUDE_LONG_NAME = "annotations-to-exclude"; public static final String SAMPLE_NAME_LONG_NAME = "sample-name"; public static final String PEDIGREE_FILE_LONG_NAME = "pedigree"; + public static final String SITES_ONLY_LONG_NAME = "sites-only-vcf-output"; public static final String INPUT_SHORT_NAME = "I"; public static final String OUTPUT_SHORT_NAME = "O"; diff --git a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java index 559125c58c5..c9eee1182d7 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java @@ -113,6 +113,10 @@ public abstract class GATKTool extends CommandLineProgram { optional = true) public boolean disableBamIndexCaching = false; + @Argument(fullName = StandardArgumentDefinitions.SITES_ONLY_LONG_NAME, + doc = "If true, don't emit genotype fields when writing vcf file output.", optional = true) + public boolean outputSitesOnlyVCFs = false; + /** * Master sequence dictionary to be used instead of all other dictionaries (if provided). */ @@ -728,6 +732,10 @@ public VariantContextWriter createVCFWriter(final File outFile) { } } + if (outputSitesOnlyVCFs) { + options.add(Options.DO_NOT_WRITE_GENOTYPES); + } + return GATKVariantContextUtils.createVCFWriter( outFile, sequenceDictionary, diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCaller.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCaller.java index 5b2972bc050..d497f2b277d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCaller.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCaller.java @@ -6,7 +6,6 @@ import java.nio.file.Path; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.ArgumentCollection; -import org.broadinstitute.barclay.argparser.BetaFeature; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; @@ -16,9 +15,7 @@ import org.broadinstitute.hellbender.engine.filters.ReadFilter; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.hellbender.utils.SimpleInterval; -import java.io.File; import java.io.FileNotFoundException; import java.util.List; import org.broadinstitute.hellbender.utils.io.IOUtils; @@ -198,7 +195,7 @@ public void onTraversalStart() { // The HC engine will make the right kind (VCF or GVCF) of writer for us final SAMSequenceDictionary sequenceDictionary = getHeaderForReads().getSequenceDictionary(); - vcfWriter = hcEngine.makeVCFWriter(outputVCF, sequenceDictionary, createOutputVariantIndex, createOutputVariantMD5); + vcfWriter = hcEngine.makeVCFWriter(outputVCF, sequenceDictionary, createOutputVariantIndex, createOutputVariantMD5, outputSitesOnlyVCFs); hcEngine.writeHeader(vcfWriter, sequenceDictionary, getDefaultToolVCFHeaderLines()); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java index e957cc66803..20c209b3015 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java @@ -322,17 +322,20 @@ public static List makeStandardHCReadFilters() { * @return a VCF or GVCF writer as appropriate, ready to use */ public VariantContextWriter makeVCFWriter( final String outputVCF, final SAMSequenceDictionary readsDictionary, - final boolean createOutputVariantIndex, final boolean createOutputVariantMD5 ) { + final boolean createOutputVariantIndex, final boolean createOutputVariantMD5, + final boolean sitesOnlyMode ) { Utils.nonNull(outputVCF); Utils.nonNull(readsDictionary); + final List options = new ArrayList<>(2); + if (createOutputVariantIndex) {options.add(Options.INDEX_ON_THE_FLY);} + if (sitesOnlyMode) {options.add(Options.DO_NOT_WRITE_GENOTYPES);} + VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter( new File(outputVCF), readsDictionary, createOutputVariantMD5, - createOutputVariantIndex ? - new Options[]{Options.INDEX_ON_THE_FLY} : - new Options[0] + options.toArray(new Options[options.size()]) ); if ( hcArgs.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) { diff --git a/src/main/java/org/broadinstitute/hellbender/utils/test/CommandLineProgramTester.java b/src/main/java/org/broadinstitute/hellbender/utils/test/CommandLineProgramTester.java index 3852ad3f97f..dbed8e13d1c 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/test/CommandLineProgramTester.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/test/CommandLineProgramTester.java @@ -78,12 +78,21 @@ default List injectDefaultVerbosity(final List args) { /** * Runs the command line implemented by this test. * - * Default behaviour uses {@link Main} with the command line arguments created by {@link #makeCommandLineArgs(List)}. + * Default behavior uses {@link Main} with the command line arguments created by {@link #makeCommandLineArgs(List)}. */ default Object runCommandLine(final List args) { return new Main().instanceMain(makeCommandLineArgs(args)); } + /** + * Lets you explicitly specify a tool to run with the provided arguments + * + * Default behavior uses {@link Main} with the command line arguments created by {@link #makeCommandLineArgs(List, String)}. + */ + default Object runCommandLine(final List args, final String toolName) { + return new Main().instanceMain(makeCommandLineArgs(args, toolName)); + } + default Object runCommandLine(final String[] args) { return runCommandLine(Arrays.asList(args)); } diff --git a/src/test/java/org/broadinstitute/hellbender/CommandLineProgramTest.java b/src/test/java/org/broadinstitute/hellbender/CommandLineProgramTest.java index bb6e819601a..1a4be742148 100644 --- a/src/test/java/org/broadinstitute/hellbender/CommandLineProgramTest.java +++ b/src/test/java/org/broadinstitute/hellbender/CommandLineProgramTest.java @@ -32,4 +32,9 @@ public Object runCommandLine(final List args) { return new Main().instanceMain(makeCommandLineArgs(args)); } + @Override + public Object runCommandLine(final List args, final String toolName) { + return new Main().instanceMain(makeCommandLineArgs(args, toolName)); + } + } diff --git a/src/test/java/org/broadinstitute/hellbender/engine/FeatureSupportIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/engine/FeatureSupportIntegrationTest.java index 1aa0752def2..2b4318ff9d5 100644 --- a/src/test/java/org/broadinstitute/hellbender/engine/FeatureSupportIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/engine/FeatureSupportIntegrationTest.java @@ -6,7 +6,6 @@ import org.broadinstitute.hellbender.tools.examples.ExampleReadWalkerWithVariants; import org.testng.annotations.Test; -import java.io.File; import java.io.IOException; import java.util.Arrays; diff --git a/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java new file mode 100644 index 00000000000..329bd066748 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java @@ -0,0 +1,38 @@ +package org.broadinstitute.hellbender.engine; + +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFHeader; +import org.apache.commons.lang3.tuple.Pair; +import org.broadinstitute.hellbender.CommandLineProgramTest; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.tools.walkers.variantutils.SelectVariants; +import org.broadinstitute.hellbender.utils.test.VariantContextTestUtils; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.Arrays; +import java.util.List; + +public class GatkToolIntegrationTest extends CommandLineProgramTest { + private static final String TEST_DIRECTORY = publicTestDir + "org/broadinstitute/hellbender/engine/"; + + @Test + public void testSitesOnlyMode() { + File out = createTempFile("GTStrippedOutput", "vcf"); + String[] args = new String[] { + "-V", TEST_DIRECTORY + "vcf_with_genotypes.vcf", + "--" + StandardArgumentDefinitions.SITES_ONLY_LONG_NAME, + "-O", + out.getAbsolutePath()}; + runCommandLine(Arrays.asList(args), SelectVariants.class.getSimpleName()); + + // Assert that the genotype field has been stripped from the file + Pair> results = VariantContextTestUtils.readEntireVCFIntoMemory(out.getAbsolutePath()); + + Assert.assertFalse(results.getLeft().hasGenotypingData()); + for (VariantContext v: results.getRight()) { + Assert.assertFalse(v.hasGenotypes()); + } + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index c44add701a9..60d0f99a70a 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -4,7 +4,6 @@ import htsjdk.tribble.Tribble; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Genotype; -import htsjdk.variant.variantcontext.GenotypesContext; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFConstants; import htsjdk.variant.vcf.VCFHeader; @@ -377,6 +376,32 @@ public void testBamoutProducesReasonablySizedOutput() { } } + + @Test + public void testSitesOnlyMode() { + Utils.resetRandomGenerator(); + File out = createTempFile("GTStrippedOutput", "vcf"); + final String[] args = { + "-I", NA12878_20_21_WGS_bam, + "-R", b37_reference_20_21, + "-L", "20:10000000-10010000", + "-O", out.getAbsolutePath(), + "-pairHMM", "AVX_LOGLESS_CACHING", + "--" + StandardArgumentDefinitions.SITES_ONLY_LONG_NAME, + "--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false" + }; + + runCommandLine(args); + + // Assert that the genotype field has been stripped from the file + Pair> results = VariantContextTestUtils.readEntireVCFIntoMemory(out.getAbsolutePath()); + + Assert.assertFalse(results.getLeft().hasGenotypingData()); + for (VariantContext v: results.getRight()) { + Assert.assertFalse(v.hasGenotypes()); + } + } + @DataProvider(name="outputFileVariations") public Object[][] getOutputFileVariations() { return new Object[][]{ diff --git a/src/test/resources/org/broadinstitute/hellbender/engine/vcf_with_genotypes.vcf b/src/test/resources/org/broadinstitute/hellbender/engine/vcf_with_genotypes.vcf new file mode 100644 index 00000000000..2eb3b433d73 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/engine/vcf_with_genotypes.vcf @@ -0,0 +1,23 @@ +##fileformat=VCFv4.1 +##ALT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GVCFBlock=minGQ=0(inclusive),maxGQ=5(exclusive) +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA1 +20 69491 . A . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:94:99:82:99:0,120,1800 +20 69511 . A G, 2253.77 . . GT:AD:DP:GQ:PL:SB 1/1:1,79,0:80:99:2284,207,0,2287,237,2316:0,1,46,33 +20 69512 . C . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:96:99:82:99:0,120,1800 +20 69522 . C . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:95:0:95:0:0,0,0 +20 69549 . C . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:156:99:56:66:0,66,990 +20 69635 . C T, 60.77 . . GT:AD:DP:GQ:PL:SB 0/1:4,3,0:7:89:89,0,119,101,128,229:0,4,0,3 +20 69762 . A . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:18:7:18:0,18,270 +20 69763 . C . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:21:7:21:0,21,253 +20 69767 . C . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:12:7:12:0,12,180 +20 69772 . TTATC T, 60.77 . . GT:AD:DP:GQ:PL:SB 0/1:4,3,0:7:89:89,0,119,101,128,229:0,4,0,3 +20 69773 . T . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:0:3:0:0,0,0