-
Notifications
You must be signed in to change notification settings - Fork 596
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added --sites-only argument to GATK engine #4764
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -113,6 +113,10 @@ public abstract class GATKTool extends CommandLineProgram { | |
optional = true) | ||
public boolean disableBamIndexCaching = false; | ||
|
||
@Argument(fullName = StandardArgumentDefinitions.SITES_ONLY_LONG_NAME, | ||
doc = "If true, don't emit GT fields when writing vcf file output.", optional = true) | ||
public boolean sitesOnlyMode = false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Name of field should make it clear that this applies to VCFs specifically -- eg., |
||
|
||
/** | ||
* Master sequence dictionary to be used instead of all other dictionaries (if provided). | ||
*/ | ||
|
@@ -728,6 +732,10 @@ public VariantContextWriter createVCFWriter(final File outFile) { | |
} | ||
} | ||
|
||
if (sitesOnlyMode) { | ||
options.add(Options.DO_NOT_WRITE_GENOTYPES); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you check to see whether there are any other Don't worry about non-GATKTools. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have already checked. It is bypassed in a number of tests, the HaplotypeCaller, and |
||
} | ||
|
||
return GATKVariantContextUtils.createVCFWriter( | ||
outFile, | ||
sequenceDictionary, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -322,17 +322,20 @@ public static List<ReadFilter> makeStandardHCReadFilters() { | |
* @return a VCF or GVCF writer as appropriate, ready to use | ||
*/ | ||
public VariantContextWriter makeVCFWriter( final String outputVCF, final SAMSequenceDictionary readsDictionary, | ||
final boolean createOutputVariantIndex, final boolean createOutputVariantMD5 ) { | ||
final boolean createOutputVariantIndex, final boolean sitesOnlyMode, | ||
final boolean createOutputVariantMD5 ) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be slightly more logical if the ordering of the boolean args were: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
Utils.nonNull(outputVCF); | ||
Utils.nonNull(readsDictionary); | ||
|
||
List<Options> options = new ArrayList<>(2); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if (createOutputVariantIndex) {options.add(Options.INDEX_ON_THE_FLY);} | ||
if (sitesOnlyMode) {options.add(Options.DO_NOT_WRITE_GENOTYPES);} | ||
|
||
VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter( | ||
new File(outputVCF), | ||
readsDictionary, | ||
createOutputVariantMD5, | ||
createOutputVariantIndex ? | ||
new Options[]{Options.INDEX_ON_THE_FLY} : | ||
new Options[0] | ||
options.toArray(new Options[options.size()]) | ||
); | ||
|
||
if ( hcArgs.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,23 @@ | ||
package org.broadinstitute.hellbender.engine; | ||
|
||
import htsjdk.variant.variantcontext.VariantContext; | ||
import htsjdk.variant.vcf.VCFHeader; | ||
import org.apache.commons.lang3.tuple.Pair; | ||
import org.broadinstitute.hellbender.CommandLineProgramTest; | ||
import org.broadinstitute.hellbender.Main; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove stray imports from this class There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doen |
||
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; | ||
import org.broadinstitute.hellbender.exceptions.UserException; | ||
import org.broadinstitute.hellbender.tools.walkers.variantutils.SelectVariants; | ||
import org.broadinstitute.hellbender.utils.test.IntegrationTestSpec; | ||
import org.broadinstitute.hellbender.tools.examples.ExampleReadWalkerWithVariants; | ||
import org.broadinstitute.hellbender.utils.test.VariantContextTestUtils; | ||
import org.testng.Assert; | ||
import org.testng.annotations.Test; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
|
||
public final class FeatureSupportIntegrationTest extends CommandLineProgramTest { | ||
private static final String FEATURE_INTEGRATION_TEST_DIRECTORY = publicTestDir + "org/broadinstitute/hellbender/engine/"; | ||
|
@@ -83,6 +92,25 @@ public void testFeaturesAsIntervalsUnrecognizedFormatFile() throws IOException { | |
testSpec.executeTest("testFeaturesAsIntervals", this); | ||
} | ||
|
||
@Test | ||
public void testSitesOnlyMode() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you move this to a new |
||
File out = createTempFile("GTStrippedOutput", "vcf"); | ||
String[] args = new String[] { | ||
"-V", FEATURE_INTEGRATION_TEST_DIRECTORY + "vcf_with_genotypes.vcf", | ||
"--" + StandardArgumentDefinitions.SITES_ONLY_LONG_NAME, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you confirm that this test fails if you remove the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just checked, it fails without --sites-only |
||
"-O", | ||
out.getAbsolutePath()}; | ||
new Main().instanceMain(makeCommandLineArgs(Arrays.asList(args), SelectVariants.class.getSimpleName())); | ||
|
||
// Assert that the genotype field has been stripped from the file | ||
Pair<VCFHeader, List<VariantContext>> results = VariantContextTestUtils.readEntireVCFIntoMemory(out.getAbsolutePath()); | ||
|
||
Assert.assertFalse(results.getLeft().hasGenotypingData()); | ||
for (VariantContext v: results.getRight()) { | ||
Assert.assertFalse(v.hasGenotypes()); | ||
} | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you also add a similar test to |
||
@Test | ||
// this test asserts that a helpful exception is thrown for blockZipped files lacking an index as they may not be fully supported | ||
//TODO this is a temporary fix until https://github.com/broadinstitute/gatk/issues/4224 has been resolved | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
##fileformat=VCFv4.1 | ||
##ALT=<ID=NON_REF,Description="Represents any possible alternative allele at this location"> | ||
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> | ||
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block"> | ||
##FORMAT=<ID=MIN_GQ,Number=1,Type=Integer,Description="Minimum GQ observed within the GVCF block"> | ||
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> | ||
##FORMAT=<ID=SB,Number=4,Type=Integer,Description="Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias."> | ||
##GVCFBlock=minGQ=0(inclusive),maxGQ=5(exclusive) | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA1 | ||
20 69491 . A <NON_REF> . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:94:99:82:99:0,120,1800 | ||
20 69511 . A G,<NON_REF> 2253.77 . . GT:AD:DP:GQ:PL:SB 1/1:1,79,0:80:99:2284,207,0,2287,237,2316:0,1,46,33 | ||
20 69512 . C <NON_REF> . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:96:99:82:99:0,120,1800 | ||
20 69522 . C <NON_REF> . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:95:0:95:0:0,0,0 | ||
20 69549 . C <NON_REF> . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:156:99:56:66:0,66,990 | ||
20 69635 . C T,<NON_REF> 60.77 . . GT:AD:DP:GQ:PL:SB 0/1:4,3,0:7:89:89,0,119,101,128,229:0,4,0,3 | ||
20 69762 . A <NON_REF> . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:18:7:18:0,18,270 | ||
20 69763 . C <NON_REF> . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:21:7:21:0,21,253 | ||
20 69767 . C <NON_REF> . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:12:7:12:0,12,180 | ||
20 69772 . TTATC T,<NON_REF> 60.77 . . GT:AD:DP:GQ:PL:SB 0/1:4,3,0:7:89:89,0,119,101,128,229:0,4,0,3 | ||
20 69773 . T <NON_REF> . . . GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:0:3:0:0,0,0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you rename to the more explicit
sites-only-vcf-output
?