From b3d641c7a8df9bf482352dc769f4e6097c19bf0e Mon Sep 17 00:00:00 2001 From: Megan Shand Date: Wed, 16 Aug 2023 11:07:25 -0400 Subject: [PATCH] Arg to move site level filters to genotype level filters in ReblockGvcfs --- .../walkers/variantutils/ReblockGVCF.java | 28 ++++++++++++++----- .../ReblockGVCFIntegrationTest.java | 26 +++++++++++++++++ 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java index 8b965e93012..3fb50c958a3 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java @@ -115,6 +115,7 @@ public final class ReblockGVCF extends MultiVariantWalker { public static final String ALLOW_MISSING_LONG_NAME = "allow-missing-hom-ref-data"; public static final String KEEP_SITE_FILTERS_LONG_NAME = "keep-site-filters"; public static final String KEEP_SITE_FILTERS_SHORT_NAME = "keep-filters"; + public static final String ADD_FILTERS_TO_GENOTYPE = "add-site-filters-to-genotype"; @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc="File to which variants should be written") @@ -175,6 +176,10 @@ public final class ReblockGVCF extends MultiVariantWalker { @Argument(fullName=KEEP_SITE_FILTERS_LONG_NAME, shortName = KEEP_SITE_FILTERS_SHORT_NAME, doc="Keep site level filters for variants (not ref blocks).") private boolean keepFilters = false; + @Advanced + @Argument(fullName= ADD_FILTERS_TO_GENOTYPE, doc="Add site level filters to genotype level. Site level filters removed by default, if they should be kept, use --" + KEEP_SITE_FILTERS_LONG_NAME) + private boolean addFiltersToFormatField = false; + //TODO: this will be an argument when posteriors handling is fully implemented in AlleleSubsettingUtils protected String posteriorsKey = null; @@ -273,6 +278,10 @@ public void onTraversalStart() { } } + if (addFiltersToFormatField) { + headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY)); + } + //Allele length and Genotype length annotations need to be subset or removed if alleles are dropped so we need to parse the header for annotation count types for(final VCFFormatHeaderLine formatHeaderLine : inputHeader.getFormatHeaderLines()) { if(formatHeaderLine.getCountType().equals(VCFHeaderLineCount.A) || @@ -677,8 +686,13 @@ VariantContext cleanUpHighQualityVariant(final VariantContext variant) { final Genotype updatedAllelesGenotype = updatedAllelesVC.getGenotype(0); //remove any AD reads for the non-ref - final List genotypesArray = removeNonRefADs(updatedAllelesGenotype, updatedAllelesVC.getAlleleIndex(Allele.NON_REF_ALLELE)); - builder.genotypes(genotypesArray); + final Genotype g = removeNonRefADs(updatedAllelesGenotype, updatedAllelesVC.getAlleleIndex(Allele.NON_REF_ALLELE)); + if (addFiltersToFormatField) { + final List filters = updatedAllelesVC.getFilters().stream().toList(); + builder.genotypes(new GenotypeBuilder(g).filters(filters).make()); + } else { + builder.genotypes(g); + } composeUpdatedAnnotations(attrMap, doQualApprox, posteriorsKey, variant, annotationEngine, relevantIndices, updatedAllelesVC, annotationsToKeep); @@ -882,9 +896,9 @@ private static void addQualAnnotations(final Map destination, fi * Any AD counts for the non-ref allele get propagated to every new allele when GVCFs are merged, so zero them out * @param g a genotype that may or may not contain AD * @param nonRefInd allele index of the non-ref, -1 if missing - * @return an unmodifiable Genotype array that can be used by a GenotypeBuilder + * @return a Genotype with the non-ref AD zeroed out and new DP, or the original genotype if it doesn't have AD */ - private List removeNonRefADs(final Genotype g, final int nonRefInd) { + private Genotype removeNonRefADs(final Genotype g, final int nonRefInd) { if (g.hasAD() && nonRefInd != -1) { final int[] ad = g.getAD(); if (ad.length >= nonRefInd && ad[nonRefInd] > 0) { //only initialize a builder if we have to @@ -897,12 +911,12 @@ private List removeNonRefADs(final Genotype g, final int nonRefInd) { } else { gb.DP((int) MathUtils.sum(ad)); } - return Collections.singletonList(gb.make()); + return gb.make(); } else { - return Collections.singletonList(g); + return g; } } else { - return Collections.singletonList(g); + return g; } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java index 3c69f10373b..b0f523b6f7e 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java @@ -578,6 +578,32 @@ public void testFilters() throws IOException { Assert.assertEquals(filteredRefBlockVC.getGenotype(0).getDP(), 12); // Ref block is combination of filtered variant with depth 22 and filtered ref block with depth 1 } + @Test + public void testMovingFilters() throws IOException { + final File input = getTestFile("dragen.g.vcf"); + final File output = createTempFile("reblockedgvcf", ".vcf"); + + final ArgumentsBuilder args = new ArgumentsBuilder(); + args.addReference(new File(hg38Reference)) + .add("V", input) + .add("L", "chr1") + .add(ReblockGVCF.ADD_FILTERS_TO_GENOTYPE, true) + .addOutput(output); + runCommandLine(args); + + final VariantContext filteredVC = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()).getRight().get(3); // last site in the file + Assert.assertFalse(filteredVC.isFiltered()); + Assert.assertTrue(filteredVC.getGenotype(0).isFiltered()); + + final VariantContext unfilteredVC = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()).getRight().get(1); + Assert.assertFalse(unfilteredVC.isFiltered()); + Assert.assertFalse(unfilteredVC.getGenotype(0).isFiltered()); + + final VariantContext filteredRefBlockVC = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()).getRight().get(0); + Assert.assertFalse(filteredRefBlockVC.isFiltered()); // Ref block is unfiltered even though the input RefBlock and low qual variant were both filtered + Assert.assertFalse(filteredRefBlockVC.getGenotype(0).isFiltered()); // Ref block genotype is also unfiltered + } + @Test public void testRemovingFormatAnnotations() { final File input = getTestFile("dragen.g.vcf");