-
Notifications
You must be signed in to change notification settings - Fork 596
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
removing A,R, and G length attributes when ReblockGVCF subsets an allele #8209
Changes from all commits
cd3cb80
e018d20
f2631d9
a21fe8d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -184,6 +184,8 @@ public final class ReblockGVCF extends MultiVariantWalker { | |
|
||
private CachingIndexedFastaSequenceFile referenceReader; | ||
|
||
private static final List<String> alleleBasedLengthAnnots = new ArrayList<>(); | ||
|
||
public static class AlleleLengthComparator implements Comparator<Allele> { | ||
public int compare(Allele a1, Allele a2) { | ||
return a1.getBaseString().length() - a2.getBaseString().length(); | ||
|
@@ -256,6 +258,15 @@ public void onTraversalStart() { | |
} | ||
} | ||
|
||
//Allele length and Genotype length annotations need to be subset or removed if alleles are dropped so we need to parse the header for annotation count types | ||
for(final VCFFormatHeaderLine formatHeaderLine : inputHeader.getFormatHeaderLines()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Occasionally the header is wrong, but it's usually A should be R or vice versa, not const should be A. I think this is fine since we're just using this as a list to remove, but it's something to keep in mind. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, in that case I'll add a warning for the logs that at least lists which annotations have been removed for easier debugging in the future. |
||
if(formatHeaderLine.getCountType().equals(VCFHeaderLineCount.A) || | ||
formatHeaderLine.getCountType().equals(VCFHeaderLineCount.R) || | ||
formatHeaderLine.getCountType().equals(VCFHeaderLineCount.G)) { | ||
alleleBasedLengthAnnots.add(formatHeaderLine.getID()); | ||
} | ||
} | ||
|
||
if ( dbsnp.dbsnp != null ) { | ||
VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY); | ||
} | ||
|
@@ -599,7 +610,7 @@ VariantContext cleanUpHighQualityVariant(final VariantContext variant) { | |
if(allelesNeedSubsetting && !keepAllAlts) { | ||
newAlleleSetUntrimmed.removeAll(allelesToDrop); | ||
final GenotypesContext gc = AlleleSubsettingUtils.subsetAlleles(variant.getGenotypes(), genotype.getPloidy(), variant.getAlleles(), | ||
newAlleleSetUntrimmed, null, GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN); | ||
newAlleleSetUntrimmed, null, GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, alleleBasedLengthAnnots); | ||
if (gc.get(0).isHomRef() || !gc.get(0).hasGQ() || gc.get(0).getAlleles().contains(Allele.NO_CALL)) { //could be low quality or no-call after subsetting | ||
if (dropLowQuals) { | ||
return null; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To be honest, I can't follow my own logic here, but you just copied so you're not to blame.