Skip to content

Commit

Permalink
Fixed a missing special case in MarkDuplicates ReadsKey code to bette…
Browse files Browse the repository at this point in the history
…r match current picard results (#4899)

* Added tests and fixes so that readsKey objects for a pair of reads which happen to be unclipped to the same location in the genome which was not being handled properly.
  • Loading branch information
jamesemery authored Jun 18, 2018
1 parent 41788f5 commit c97c7de
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,21 +55,39 @@ public Pair(final GATKRead read1, final GATKRead read2, final SAMFileHeader head
final int read1UnclippedStart = ReadUtils.getStrandedUnclippedStart(read1);
final int read2UnclippedStart = ReadUtils.getStrandedUnclippedStart(read2);

if( read1UnclippedStart < read2UnclippedStart ){
int read1ReferenceIndex = ReadUtils.getReferenceIndex(read1,header);
int read2ReferenceIndex = ReadUtils.getReferenceIndex(read2,header);

if( read1ReferenceIndex != read2ReferenceIndex ? read1ReferenceIndex < read2ReferenceIndex : read1UnclippedStart <= read2UnclippedStart ){
first = read1;
second = read2;
} else {
first = read2;
second = read1;
}
// Keep track of the orientation of read1 and read2 as it is important for optical duplicate marking
wasFlipped = second.isFirstOfPair();

firstStartPosition = first.getAssignedStart();

// if the two read ends are in the same position, pointing in opposite directions,
// the orientation is undefined and the procedure above
// will depend on the order of the reads in the file.
// To avoid this, and match picard's behavior in this case, we ensure the orientation will be FR:
if (read1ReferenceIndex == read2ReferenceIndex &&
read1UnclippedStart == read2UnclippedStart &&
first.isReverseStrand() && !second.isReverseStrand()) {
// setting to FR for consistencies sake. (which involves flipping) if both reads had the same unclipped start
GATKRead tmp = first;
first = second;
second = tmp;
}

this.key = ReadsKey.getKeyForPair(header, first, second, headerLibraryMap);

isRead1ReverseStrand = first.isReverseStrand();
isRead2ReverseStrand = second.isReverseStrand();

// Keep track of the orientation of read1 and read2 as it is important for optical duplicate marking
wasFlipped = second.isFirstOfPair();

this.key = ReadsKey.getKeyForPair(header, first, second, headerLibraryMap);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,29 @@ public void testOpticalDuplicatesAndPCRDuplicatesOrientationDifference() {
tester.runTest();
}

@Test
// To match picard, when two reads are grouped into a duplicate group and have the same unclipped start position we will unify their orientations
// to FR if they point in opposite directions. This test asserts that two pairs of reads that ultimately all map to 93470499-93470499 with clipping
// and orientations will ultimately be marked and mapped together even though they have opposite orientations (one is FR while the other is RF).
public void testReadSameStartPositionOrientationOverride() {
final AbstractMarkDuplicatesTester tester = getTester();
tester.setExpectedOpticalDuplicate(0);
tester.addMatePair("HJYFJCCXX160204:8:1124:31659:21526", 19, 19, 93470380, 93470499, false, false, false, false, "31S111M9S", "64M87S", true, false, false, false, false, DEFAULT_BASE_QUALITY, "1"); // after clipping these both start on 93470499
tester.addMatePair("HJYFJCCXX160204:8:1124:31659:21527", 19, 19, 93470499, 93470380, false, false, true, true, "64M87S", "31S111M9S", false, true, false, false, false, DEFAULT_BASE_QUALITY, "1"); // after clipping these both start on 93470499
tester.runTest();
}

@Test
// This asserts that we are flipping reads in the Pair object based on both start position and contig index, this does not
// make a difference unless the start position is the same across two contigs, so we assert it is handled properly
public void testReadsHaveSameStartPositionButDifferentChromosomeNonEquivalence() {
final AbstractMarkDuplicatesTester tester = getTester();
tester.setExpectedOpticalDuplicate(0);
tester.addMatePair("HJYFJCCXX160204:8:1124:31659:21526", 19, 20, 93470380, 93470499, false, false, false, false, "31S111M9S", "64M87S", true, false, false, false, false, DEFAULT_BASE_QUALITY, "1"); // after clipping these both start on 93470499
tester.addMatePair("HJYFJCCXX160204:8:1124:31659:21527", 20, 19, 93470499, 93470380, false, false, true, true, "64M87S", "31S111M9S", false, true, false, false, false, DEFAULT_BASE_QUALITY, "1"); // after clipping these both start on 93470499
tester.runTest();
}

@Test
public void testOpticalDuplicateClusterSamePositionNoOpticalDuplicatesWithinPixelDistance() {
final AbstractMarkDuplicatesTester tester = getTester();
Expand Down

0 comments on commit c97c7de

Please sign in to comment.