Skip to content

Commit

Permalink
Arg to move site level filters to genotype level filters in ReblockGv…
Browse files Browse the repository at this point in the history
…cfs (#8484)
  • Loading branch information
meganshand authored Aug 25, 2023
1 parent 98b3b4f commit de371aa
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ public final class ReblockGVCF extends MultiVariantWalker {
public static final String ALLOW_MISSING_LONG_NAME = "allow-missing-hom-ref-data";
public static final String KEEP_SITE_FILTERS_LONG_NAME = "keep-site-filters";
public static final String KEEP_SITE_FILTERS_SHORT_NAME = "keep-filters";
public static final String ADD_FILTERS_TO_GENOTYPE = "add-site-filters-to-genotype";

@Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME,
doc="File to which variants should be written")
Expand Down Expand Up @@ -175,6 +176,10 @@ public final class ReblockGVCF extends MultiVariantWalker {
@Argument(fullName=KEEP_SITE_FILTERS_LONG_NAME, shortName = KEEP_SITE_FILTERS_SHORT_NAME, doc="Keep site level filters for variants (not ref blocks).")
private boolean keepFilters = false;

@Advanced
@Argument(fullName= ADD_FILTERS_TO_GENOTYPE, doc="Add site level filters to genotype level. Site level filters removed by default, if they should be kept, use --" + KEEP_SITE_FILTERS_LONG_NAME)
private boolean addFiltersToFormatField = false;

//TODO: this will be an argument when posteriors handling is fully implemented in AlleleSubsettingUtils
protected String posteriorsKey = null;

Expand Down Expand Up @@ -273,6 +278,10 @@ public void onTraversalStart() {
}
}

if (addFiltersToFormatField) {
headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));
}

//Allele length and Genotype length annotations need to be subset or removed if alleles are dropped so we need to parse the header for annotation count types
for(final VCFFormatHeaderLine formatHeaderLine : inputHeader.getFormatHeaderLines()) {
if(formatHeaderLine.getCountType().equals(VCFHeaderLineCount.A) ||
Expand Down Expand Up @@ -677,8 +686,13 @@ VariantContext cleanUpHighQualityVariant(final VariantContext variant) {
final Genotype updatedAllelesGenotype = updatedAllelesVC.getGenotype(0);

//remove any AD reads for the non-ref
final List<Genotype> genotypesArray = removeNonRefADs(updatedAllelesGenotype, updatedAllelesVC.getAlleleIndex(Allele.NON_REF_ALLELE));
builder.genotypes(genotypesArray);
final Genotype g = removeNonRefADs(updatedAllelesGenotype, updatedAllelesVC.getAlleleIndex(Allele.NON_REF_ALLELE));
if (addFiltersToFormatField) {
final List<String> filters = updatedAllelesVC.getFilters().stream().toList();
builder.genotypes(new GenotypeBuilder(g).filters(filters).make());
} else {
builder.genotypes(g);
}

composeUpdatedAnnotations(attrMap, doQualApprox, posteriorsKey, variant, annotationEngine, relevantIndices, updatedAllelesVC, annotationsToKeep);

Expand Down Expand Up @@ -882,9 +896,9 @@ private static void addQualAnnotations(final Map<String, Object> destination, fi
* Any AD counts for the non-ref allele get propagated to every new allele when GVCFs are merged, so zero them out
* @param g a genotype that may or may not contain AD
* @param nonRefInd allele index of the non-ref, -1 if missing
* @return an unmodifiable Genotype array that can be used by a GenotypeBuilder
* @return a Genotype with the non-ref AD zeroed out and new DP, or the original genotype if it doesn't have AD
*/
private List<Genotype> removeNonRefADs(final Genotype g, final int nonRefInd) {
private Genotype removeNonRefADs(final Genotype g, final int nonRefInd) {
if (g.hasAD() && nonRefInd != -1) {
final int[] ad = g.getAD();
if (ad.length >= nonRefInd && ad[nonRefInd] > 0) { //only initialize a builder if we have to
Expand All @@ -897,12 +911,12 @@ private List<Genotype> removeNonRefADs(final Genotype g, final int nonRefInd) {
} else {
gb.DP((int) MathUtils.sum(ad));
}
return Collections.singletonList(gb.make());
return gb.make();
} else {
return Collections.singletonList(g);
return g;
}
} else {
return Collections.singletonList(g);
return g;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,32 @@ public void testFilters() throws IOException {
Assert.assertEquals(filteredRefBlockVC.getGenotype(0).getDP(), 12); // Ref block is combination of filtered variant with depth 22 and filtered ref block with depth 1
}

@Test
public void testMovingFilters() throws IOException {
final File input = getTestFile("dragen.g.vcf");
final File output = createTempFile("reblockedgvcf", ".vcf");

final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(new File(hg38Reference))
.add("V", input)
.add("L", "chr1")
.add(ReblockGVCF.ADD_FILTERS_TO_GENOTYPE, true)
.addOutput(output);
runCommandLine(args);

final VariantContext filteredVC = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()).getRight().get(3); // last site in the file
Assert.assertFalse(filteredVC.isFiltered());
Assert.assertTrue(filteredVC.getGenotype(0).isFiltered());

final VariantContext unfilteredVC = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()).getRight().get(1);
Assert.assertFalse(unfilteredVC.isFiltered());
Assert.assertFalse(unfilteredVC.getGenotype(0).isFiltered());

final VariantContext filteredRefBlockVC = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()).getRight().get(0);
Assert.assertFalse(filteredRefBlockVC.isFiltered()); // Ref block is unfiltered even though the input RefBlock and low qual variant were both filtered
Assert.assertFalse(filteredRefBlockVC.getGenotype(0).isFiltered()); // Ref block genotype is also unfiltered
}

@Test
public void testRemovingFormatAnnotations() {
final File input = getTestFile("dragen.g.vcf");
Expand Down

0 comments on commit de371aa

Please sign in to comment.