From c97c7de23e138f084b131479736ed76d9824f028 Mon Sep 17 00:00:00 2001 From: jamesemery Date: Mon, 18 Jun 2018 12:19:29 -0400 Subject: [PATCH] Fixed a missing special case in MarkDuplicates ReadsKey code to better match current picard results (#4899) * Added tests and fixes so that readsKey objects for a pair of reads which happen to be unclipped to the same location in the genome which was not being handled properly. --- .../markduplicates/sparkrecords/Pair.java | 26 ++++++++++++++++--- ...tMarkDuplicatesCommandLineProgramTest.java | 23 ++++++++++++++++ 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/Pair.java b/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/Pair.java index 6d1dd80fd0c..0b8b3ec4109 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/Pair.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/Pair.java @@ -55,21 +55,39 @@ public Pair(final GATKRead read1, final GATKRead read2, final SAMFileHeader head final int read1UnclippedStart = ReadUtils.getStrandedUnclippedStart(read1); final int read2UnclippedStart = ReadUtils.getStrandedUnclippedStart(read2); - if( read1UnclippedStart < read2UnclippedStart ){ + int read1ReferenceIndex = ReadUtils.getReferenceIndex(read1,header); + int read2ReferenceIndex = ReadUtils.getReferenceIndex(read2,header); + + if( read1ReferenceIndex != read2ReferenceIndex ? read1ReferenceIndex < read2ReferenceIndex : read1UnclippedStart <= read2UnclippedStart ){ first = read1; second = read2; } else { first = read2; second = read1; } - // Keep track of the orientation of read1 and read2 as it is important for optical duplicate marking - wasFlipped = second.isFirstOfPair(); - firstStartPosition = first.getAssignedStart(); + // if the two read ends are in the same position, pointing in opposite directions, + // the orientation is undefined and the procedure above + // will depend on the order of the reads in the file. + // To avoid this, and match picard's behavior in this case, we ensure the orientation will be FR: + if (read1ReferenceIndex == read2ReferenceIndex && + read1UnclippedStart == read2UnclippedStart && + first.isReverseStrand() && !second.isReverseStrand()) { + // setting to FR for consistencies sake. (which involves flipping) if both reads had the same unclipped start + GATKRead tmp = first; + first = second; + second = tmp; + } + + this.key = ReadsKey.getKeyForPair(header, first, second, headerLibraryMap); + isRead1ReverseStrand = first.isReverseStrand(); isRead2ReverseStrand = second.isReverseStrand(); + // Keep track of the orientation of read1 and read2 as it is important for optical duplicate marking + wasFlipped = second.isFirstOfPair(); + this.key = ReadsKey.getKeyForPair(header, first, second, headerLibraryMap); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java index a014389b3f9..4992f2de74d 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java @@ -210,6 +210,29 @@ public void testOpticalDuplicatesAndPCRDuplicatesOrientationDifference() { tester.runTest(); } + @Test + // To match picard, when two reads are grouped into a duplicate group and have the same unclipped start position we will unify their orientations + // to FR if they point in opposite directions. This test asserts that two pairs of reads that ultimately all map to 93470499-93470499 with clipping + // and orientations will ultimately be marked and mapped together even though they have opposite orientations (one is FR while the other is RF). + public void testReadSameStartPositionOrientationOverride() { + final AbstractMarkDuplicatesTester tester = getTester(); + tester.setExpectedOpticalDuplicate(0); + tester.addMatePair("HJYFJCCXX160204:8:1124:31659:21526", 19, 19, 93470380, 93470499, false, false, false, false, "31S111M9S", "64M87S", true, false, false, false, false, DEFAULT_BASE_QUALITY, "1"); // after clipping these both start on 93470499 + tester.addMatePair("HJYFJCCXX160204:8:1124:31659:21527", 19, 19, 93470499, 93470380, false, false, true, true, "64M87S", "31S111M9S", false, true, false, false, false, DEFAULT_BASE_QUALITY, "1"); // after clipping these both start on 93470499 + tester.runTest(); + } + + @Test + // This asserts that we are flipping reads in the Pair object based on both start position and contig index, this does not + // make a difference unless the start position is the same across two contigs, so we assert it is handled properly + public void testReadsHaveSameStartPositionButDifferentChromosomeNonEquivalence() { + final AbstractMarkDuplicatesTester tester = getTester(); + tester.setExpectedOpticalDuplicate(0); + tester.addMatePair("HJYFJCCXX160204:8:1124:31659:21526", 19, 20, 93470380, 93470499, false, false, false, false, "31S111M9S", "64M87S", true, false, false, false, false, DEFAULT_BASE_QUALITY, "1"); // after clipping these both start on 93470499 + tester.addMatePair("HJYFJCCXX160204:8:1124:31659:21527", 20, 19, 93470499, 93470380, false, false, true, true, "64M87S", "31S111M9S", false, true, false, false, false, DEFAULT_BASE_QUALITY, "1"); // after clipping these both start on 93470499 + tester.runTest(); + } + @Test public void testOpticalDuplicateClusterSamePositionNoOpticalDuplicatesWithinPixelDistance() { final AbstractMarkDuplicatesTester tester = getTester();