-
Notifications
You must be signed in to change notification settings - Fork 597
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added --sites-only argument to GATK engine #4764
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -113,6 +113,10 @@ public abstract class GATKTool extends CommandLineProgram { | |
optional = true) | ||
public boolean disableBamIndexCaching = false; | ||
|
||
@Argument(fullName = StandardArgumentDefinitions.SITES_ONLY_LONG_NAME, | ||
doc = "If true, don't emit genotype fields when writing vcf file output.", optional = true) | ||
public boolean outputSitesOnlyVCFs = false; | ||
|
||
/** | ||
* Master sequence dictionary to be used instead of all other dictionaries (if provided). | ||
*/ | ||
|
@@ -728,6 +732,10 @@ public VariantContextWriter createVCFWriter(final File outFile) { | |
} | ||
} | ||
|
||
if (outputSitesOnlyVCFs) { | ||
options.add(Options.DO_NOT_WRITE_GENOTYPES); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you check to see whether there are any other Don't worry about non-GATKTools. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have already checked. It is bypassed in a number of tests, the HaplotypeCaller, and |
||
} | ||
|
||
return GATKVariantContextUtils.createVCFWriter( | ||
outFile, | ||
sequenceDictionary, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -322,17 +322,20 @@ public static List<ReadFilter> makeStandardHCReadFilters() { | |
* @return a VCF or GVCF writer as appropriate, ready to use | ||
*/ | ||
public VariantContextWriter makeVCFWriter( final String outputVCF, final SAMSequenceDictionary readsDictionary, | ||
final boolean createOutputVariantIndex, final boolean createOutputVariantMD5 ) { | ||
final boolean createOutputVariantIndex, final boolean sitesOnlyMode, | ||
final boolean createOutputVariantMD5 ) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be slightly more logical if the ordering of the boolean args were: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
Utils.nonNull(outputVCF); | ||
Utils.nonNull(readsDictionary); | ||
|
||
final List<Options> options = new ArrayList<>(2); | ||
if (createOutputVariantIndex) {options.add(Options.INDEX_ON_THE_FLY);} | ||
if (sitesOnlyMode) {options.add(Options.DO_NOT_WRITE_GENOTYPES);} | ||
|
||
VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter( | ||
new File(outputVCF), | ||
readsDictionary, | ||
createOutputVariantMD5, | ||
createOutputVariantIndex ? | ||
new Options[]{Options.INDEX_ON_THE_FLY} : | ||
new Options[0] | ||
options.toArray(new Options[options.size()]) | ||
); | ||
|
||
if ( hcArgs.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package org.broadinstitute.hellbender; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This class should live in the same package as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
import htsjdk.variant.variantcontext.VariantContext; | ||
import htsjdk.variant.vcf.VCFHeader; | ||
import org.apache.commons.lang3.tuple.Pair; | ||
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; | ||
import org.broadinstitute.hellbender.tools.walkers.variantutils.SelectVariants; | ||
import org.broadinstitute.hellbender.utils.test.VariantContextTestUtils; | ||
import org.testng.Assert; | ||
import org.testng.annotations.Test; | ||
|
||
import java.io.File; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
|
||
public class GatkToolIntegrationTest extends CommandLineProgramTest { | ||
private static final String TEST_DIRECTORY = publicTestDir + "org/broadinstitute/hellbender/engine/"; | ||
|
||
@Test | ||
public void testSitesOnlyMode() { | ||
File out = createTempFile("GTStrippedOutput", "vcf"); | ||
String[] args = new String[] { | ||
"-V", TEST_DIRECTORY + "vcf_with_genotypes.vcf", | ||
"--" + StandardArgumentDefinitions.SITES_ONLY_LONG_NAME, | ||
"-O", | ||
out.getAbsolutePath()}; | ||
new Main().instanceMain(makeCommandLineArgs(Arrays.asList(args), SelectVariants.class.getSimpleName())); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a new overload of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
// Assert that the genotype field has been stripped from the file | ||
Pair<VCFHeader, List<VariantContext>> results = VariantContextTestUtils.readEntireVCFIntoMemory(out.getAbsolutePath()); | ||
|
||
Assert.assertFalse(results.getLeft().hasGenotypingData()); | ||
for (VariantContext v: results.getRight()) { | ||
Assert.assertFalse(v.hasGenotypes()); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,23 @@ | ||
package org.broadinstitute.hellbender.engine; | ||
|
||
import htsjdk.variant.variantcontext.VariantContext; | ||
import htsjdk.variant.vcf.VCFHeader; | ||
import org.apache.commons.lang3.tuple.Pair; | ||
import org.broadinstitute.hellbender.CommandLineProgramTest; | ||
import org.broadinstitute.hellbender.Main; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove stray imports from this class There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doen |
||
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; | ||
import org.broadinstitute.hellbender.exceptions.UserException; | ||
import org.broadinstitute.hellbender.tools.walkers.variantutils.SelectVariants; | ||
import org.broadinstitute.hellbender.utils.test.IntegrationTestSpec; | ||
import org.broadinstitute.hellbender.tools.examples.ExampleReadWalkerWithVariants; | ||
import org.broadinstitute.hellbender.utils.test.VariantContextTestUtils; | ||
import org.testng.Assert; | ||
import org.testng.annotations.Test; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
|
||
public final class FeatureSupportIntegrationTest extends CommandLineProgramTest { | ||
private static final String FEATURE_INTEGRATION_TEST_DIRECTORY = publicTestDir + "org/broadinstitute/hellbender/engine/"; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,11 +12,13 @@ | |
import org.apache.commons.lang3.tuple.Pair; | ||
import org.broadinstitute.barclay.argparser.CommandLineException; | ||
import org.broadinstitute.hellbender.CommandLineProgramTest; | ||
import org.broadinstitute.hellbender.Main; | ||
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; | ||
import org.broadinstitute.hellbender.engine.FeatureDataSource; | ||
import org.broadinstitute.hellbender.engine.ReadsDataSource; | ||
import org.broadinstitute.hellbender.exceptions.UserException; | ||
import org.broadinstitute.hellbender.tools.walkers.genotyper.AlleleSubsettingUtils; | ||
import org.broadinstitute.hellbender.tools.walkers.variantutils.SelectVariants; | ||
import org.broadinstitute.hellbender.utils.SimpleInterval; | ||
import org.broadinstitute.hellbender.utils.Utils; | ||
import org.broadinstitute.hellbender.utils.io.IOUtils; | ||
|
@@ -71,6 +73,30 @@ public void testVCFModeIsConsistentWithPastResults(final String inputFileName, f | |
IntegrationTestSpec.assertEqualTextFiles(output, expected); | ||
} | ||
|
||
@Test | ||
public void testSitesOnlyMode() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Move this test after the standard There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
File out = createTempFile("GTStrippedOutput", "vcf"); | ||
final String[] args = { | ||
"-I", NA12878_20_21_WGS_bam, | ||
"-R", b37_reference_20_21, | ||
"-L", "20:10000000-10100000", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems like an unnecessarily large interval for this test. How about 10k bases instead of 100k? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
"-O", out.getAbsolutePath(), | ||
"-pairHMM", "AVX_LOGLESS_CACHING", | ||
"--" + StandardArgumentDefinitions.SITES_ONLY_LONG_NAME, | ||
"--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false" | ||
}; | ||
|
||
runCommandLine(args); | ||
|
||
// Assert that the genotype field has been stripped from the file | ||
Pair<VCFHeader, List<VariantContext>> results = VariantContextTestUtils.readEntireVCFIntoMemory(out.getAbsolutePath()); | ||
|
||
Assert.assertFalse(results.getLeft().hasGenotypingData()); | ||
for (VariantContext v: results.getRight()) { | ||
Assert.assertFalse(v.hasGenotypes()); | ||
} | ||
} | ||
|
||
/* | ||
* Test that in VCF mode we're consistent with past GATK4 results | ||
* | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
##fileformat=VCFv4.1 | ||
##ALT=<ID=NON_REF,Description="Represents any possible alternative allele at this location"> | ||
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> | ||
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block"> | ||
##FORMAT=<ID=MIN_GQ,Number=1,Type=Integer,Description="Minimum GQ observed within the GVCF block"> | ||
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> | ||
##FORMAT=<ID=SB,Number=4,Type=Integer,Description="Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias."> | ||
##GVCFBlock=minGQ=0(inclusive),maxGQ=5(exclusive) | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA1 | ||
20 69491 . A <NON_REF> . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:94:99:82:99:0,120,1800 | ||
20 69511 . A G,<NON_REF> 2253.77 . . GT:AD:DP:GQ:PL:SB 1/1:1,79,0:80:99:2284,207,0,2287,237,2316:0,1,46,33 | ||
20 69512 . C <NON_REF> . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:96:99:82:99:0,120,1800 | ||
20 69522 . C <NON_REF> . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:95:0:95:0:0,0,0 | ||
20 69549 . C <NON_REF> . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:156:99:56:66:0,66,990 | ||
20 69635 . C T,<NON_REF> 60.77 . . GT:AD:DP:GQ:PL:SB 0/1:4,3,0:7:89:89,0,119,101,128,229:0,4,0,3 | ||
20 69762 . A <NON_REF> . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:18:7:18:0,18,270 | ||
20 69763 . C <NON_REF> . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:21:7:21:0,21,253 | ||
20 69767 . C <NON_REF> . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:12:7:12:0,12,180 | ||
20 69772 . TTATC T,<NON_REF> 60.77 . . GT:AD:DP:GQ:PL:SB 0/1:4,3,0:7:89:89,0,119,101,128,229:0,4,0,3 | ||
20 69773 . T <NON_REF> . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:0:3:0:0,0,0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you rename to the more explicit
sites-only-vcf-output
?