Skip to content

Commit

Permalink
Merge pull request #622 from broadinstitute/nh_update_md_and_nm
Browse files Browse the repository at this point in the history
Update the NM and MD tags when clipping overlapping inserts in
  • Loading branch information
nh13 authored Aug 17, 2016
2 parents d233040 + e478eb5 commit 357a388
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 16 deletions.
16 changes: 16 additions & 0 deletions src/main/java/picard/sam/AbstractAlignmentMerger.java
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,20 @@ private void transferAlignmentInfoToPairedRead(final SAMRecord firstUnaligned, f
}
}

/**
* Updates the MD and NM tags if present.
* @param rec
*/
private void calculateMdAndNmTags(final SAMRecord rec) {
// update NM and MD
final boolean calculateNm = rec.getIntegerAttribute(ReservedTagConstants.NM) != null;
final boolean calculateMd = rec.getStringAttribute(SAMTag.MD.name()) != null;
if (calculateNm || calculateMd) {
final byte[] referenceBases = this.refSeq.get(this.refSeq.getSequenceDictionary().getSequenceIndex(rec.getReferenceName())).getBases();
SequenceUtil.calculateMdAndNmTags(rec, referenceBases, calculateNm, calculateMd);
}
}

/**
* Checks to see whether the ends of the reads overlap and soft clips reads
* them if necessary.
Expand All @@ -605,11 +619,13 @@ protected void clipForOverlappingReads(final SAMRecord read1, final SAMRecord re
if (posDiff > 0) {
CigarUtil.softClip3PrimeEndOfRead(pos, Math.min(pos.getReadLength(),
pos.getReadLength() - posDiff + 1));
calculateMdAndNmTags(pos);
}

if (negDiff > 0) {
CigarUtil.softClip3PrimeEndOfRead(neg, Math.min(neg.getReadLength(),
neg.getReadLength() - negDiff + 1));
calculateMdAndNmTags(neg);
}

}
Expand Down
11 changes: 9 additions & 2 deletions src/test/java/picard/sam/MergeBamAlignmentTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1177,19 +1177,26 @@ public void testShortFragmentClipping() throws Exception {
final Map<String, SAMRecord> firstReadEncountered = new HashMap<String, SAMRecord>();

for (final SAMRecord rec : result) {
final SAMRecord otherEnd = firstReadEncountered.get(rec.getReadName());
final String[] readNameFields = rec.getReadName().split(":");
final SAMRecord otherEnd = firstReadEncountered.get(rec.getReadName());
if (otherEnd == null) {
firstReadEncountered.put(rec.getReadName(), rec);
} else {
final int fragmentStart = Math.min(rec.getAlignmentStart(), otherEnd.getAlignmentStart());
final int fragmentEnd = Math.max(rec.getAlignmentEnd(), otherEnd.getAlignmentEnd());
final String[] readNameFields = rec.getReadName().split(":");
// Read name of each pair includes the expected fragment start and fragment end positions.
final int expectedFragmentStart = Integer.parseInt(readNameFields[1]);
final int expectedFragmentEnd = Integer.parseInt(readNameFields[2]);
Assert.assertEquals(fragmentStart, expectedFragmentStart, rec.getReadName());
Assert.assertEquals(fragmentEnd, expectedFragmentEnd, rec.getReadName());
}
// check NM and MD. We have a mismatching base in the overlap.
if (readNameFields.length > 3) {
final String md = readNameFields[3];
final Integer nm = Integer.parseInt(readNameFields[4]);
Assert.assertEquals(rec.getStringAttribute(SAMTag.MD.name()), md);
Assert.assertEquals(rec.getIntegerAttribute(SAMTag.NM.name()), nm);
}
}
result.close();
}
Expand Down
20 changes: 10 additions & 10 deletions testdata/picard/sam/MergeBamAlignment/cliptest.aligned.sam
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
@HD VN:1.0 SO:queryname
@SQ SN:chr1 LN:1000 UR:file:testdata/net/sf/picard/sam/MergeBamAlignment/cliptest.fasta M5:17522ddd273279f4595f50fea9864734
FF:80:175 65 chr1 100 20 76M chr1 80 0 GTACCCGGGGATCCCACTCTCTCCTTGCCCTTCATGATCTTGGCACTTTCGGAGTAATGGTCATAACATCAGTAAA ECEEEEEDDCCCDDDCDDDDEBDCCCCDBBD@DBCBCCCC:ACAA?CBCCCABCBBBBBBB?BBB?<?A?<7<<=9
FF:80:175 129 chr1 80 20 76M chr1 100 0 TCGACTCTAGAGGATCCCACGAGTTTCACTGTTGTCACATATGCTGGAGTGCAGTGGTGCAATCTTGGCTTACTGC GGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGFGGFGGDGEEE-:CECEECFEDE?FBCFD=@CBBADCF=CC:CCD
FR_clip:100:155 97 chr1 100 20 76M chr1 80 0 GTACCCGGGGATCCCACTCTCTCCTTGCCCTTCATGATCTTGGCACTTTCGGAGTAATGGTCATAACATCAGTAAA ECEEEEEDDCCCDDDCDDDDEBDCCCCDBBD@DBCBCCCC:ACAA?CBCCCABCBBBBBBB?BBB?<?A?<7<<=9
FR_clip:100:155 145 chr1 80 20 76M chr1 100 0 TCGACTCTAGAGGATCCCACGAGTTTCACTGTTGTCACATATGCTGGAGTGCAGTGGTGCAATCTTGGCTTACTGC GGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGFGGFGGDGEEE-:CECEECFEDE?FBCFD=@CBBADCF=CC:CCD
FR_noclip:100:575 97 chr1 100 20 76M chr1 500 0 GTACCCGGGGATCCCACTCTCTCCTTGCCCTTCATGATCTTGGCACTTTCGGAGTAATGGTCATAACATCAGTAAA ECEEEEEDDCCCDDDCDDDDEBDCCCCDBBD@DBCBCCCC:ACAA?CBCCCABCBBBBBBB?BBB?<?A?<7<<=9
FR_noclip:100:575 145 chr1 500 20 76M chr1 100 0 TCGACTCTAGAGGATCCCACGAGTTTCACTGTTGTCACATATGCTGGAGTGCAGTGGTGCAATCTTGGCTTACTGC GGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGFGGFGGDGEEE-:CECEECFEDE?FBCFD=@CBBADCF=CC:CCD
RF:100:575 81 chr1 100 20 76M chr1 500 0 GTACCCGGGGATCCCACTCTCTCCTTGCCCTTCATGATCTTGGCACTTTCGGAGTAATGGTCATAACATCAGTAAA ECEEEEEDDCCCDDDCDDDDEBDCCCCDBBD@DBCBCCCC:ACAA?CBCCCABCBBBBBBB?BBB?<?A?<7<<=9
RF:100:575 161 chr1 500 20 76M chr1 100 0 TCGACTCTAGAGGATCCCACGAGTTTCACTGTTGTCACATATGCTGGAGTGCAGTGGTGCAATCTTGGCTTACTGC GGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGFGGFGGDGEEE-:CECEECFEDE?FBCFD=@CBBADCF=CC:CCD
RR:80:175 113 chr1 100 20 76M chr1 80 0 GTACCCGGGGATCCCACTCTCTCCTTGCCCTTCATGATCTTGGCACTTTCGGAGTAATGGTCATAACATCAGTAAA ECEEEEEDDCCCDDDCDDDDEBDCCCCDBBD@DBCBCCCC:ACAA?CBCCCABCBBBBBBB?BBB?<?A?<7<<=9
RR:80:175 177 chr1 80 20 76M chr1 100 0 TCGACTCTAGAGGATCCCACGAGTTTCACTGTTGTCACATATGCTGGAGTGCAGTGGTGCAATCTTGGCTTACTGC GGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGFGGFGGDGEEE-:CECEECFEDE?FBCFD=@CBBADCF=CC:CCD
FF:80:175 65 chr1 100 20 76M chr1 80 0 GTACCCGGGGATCCCACTCTCTCCTTGCCCTTCATGATCTTGGCACTTTCGGAGTAATGGTCATAACATCAGTAAA ECEEEEEDDCCCDDDCDDDDEBDCCCCDBBD@DBCBCCCC:ACAA?CBCCCABCBBBBBBB?BBB?<?A?<7<<=9 MD:Z:76 NM:i:0
FF:80:175 129 chr1 80 20 76M chr1 100 0 TCGACTCTAGAGGATCCCACGAGTTTCACTGTTGTCACATATGCTGGAGTGCAGTGGTGCAATCTTGGCTTACTGC GGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGFGGFGGDGEEE-:CECEECFEDE?FBCFD=@CBBADCF=CC:CCD MD:Z:76 NM:i:0
FR_clip:100:155:56:0 97 chr1 100 20 76M chr1 80 0 ATTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACCTTTAAGGGCAAAAAAAAAACAATAA ECEEEEEDDCCCDDDCDDDDEBDCCCCDBBD@DBCBCCCC:ACAA?CBCCCABCBBBBBBB?BBB?<?A?<7<<=9 MD:Z:76 NM:i:0
FR_clip:100:155:56:0 145 chr1 80 20 76M chr1 100 0 TTAGAGTACGTTAACACTCCATTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACCTTTAA GGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGFGGFGGDGEEE-:CECEECFEDE?FBCFD=@CBBADCF=CC:CCD MD:Z:76 NM:i:0
FR_noclip:100:575:76:1 97 chr1 100 20 76M chr1 500 0 ATTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACCTTTAAGGGCAAAAAAAAAACAATAA ECEEEEEDDCCCDDDCDDDDEBDCCCCDBBD@DBCBCCCC:ACAA?CBCCCABCBBBBBBB?BBB?<?A?<7<<=9 MD:Z:76 NM:i:0
FR_noclip:100:575:76:1 145 chr1 500 20 76M chr1 100 0 TTTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACCTTTAAGGGCAAAAAAAAAACAATAC GGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGFGGFGGDGEEE-:CECEECFEDE?FBCFD=@CBBADCF=CC:CCD MD:Z:76 NM:i:0
RF:100:575 81 chr1 100 20 76M chr1 500 0 GTACCCGGGGATCCCACTCTCTCCTTGCCCTTCATGATCTTGGCACTTTCGGAGTAATGGTCATAACATCAGTAAA ECEEEEEDDCCCDDDCDDDDEBDCCCCDBBD@DBCBCCCC:ACAA?CBCCCABCBBBBBBB?BBB?<?A?<7<<=9 MD:Z:76 NM:i:0
RF:100:575 161 chr1 500 20 76M chr1 100 0 TCGACTCTAGAGGATCCCACGAGTTTCACTGTTGTCACATATGCTGGAGTGCAGTGGTGCAATCTTGGCTTACTGC GGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGFGGFGGDGEEE-:CECEECFEDE?FBCFD=@CBBADCF=CC:CCD MD:Z:76 NM:i:0
RR:80:175 113 chr1 100 20 76M chr1 80 0 GTACCCGGGGATCCCACTCTCTCCTTGCCCTTCATGATCTTGGCACTTTCGGAGTAATGGTCATAACATCAGTAAA ECEEEEEDDCCCDDDCDDDDEBDCCCCDBBD@DBCBCCCC:ACAA?CBCCCABCBBBBBBB?BBB?<?A?<7<<=9 MD:Z:76 NM:i:0
RR:80:175 177 chr1 80 20 76M chr1 100 0 TCGACTCTAGAGGATCCCACGAGTTTCACTGTTGTCACATATGCTGGAGTGCAGTGGTGCAATCTTGGCTTACTGC GGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGFGGFGGDGEEE-:CECEECFEDE?FBCFD=@CBBADCF=CC:CCD MD:Z:76 NM:i:0
8 changes: 4 additions & 4 deletions testdata/picard/sam/MergeBamAlignment/cliptest.unmapped.sam
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
@RG ID:0 SM:Hi,Mom! PL:ILLUMINA
FF:80:175 77 * 0 0 * * 0 0 GTACCCGGGGATCCCACTCTCTCCTTGCCCTTCATGATCTTGGCACTTTCGGAGTAATGGTCATAACATCAGTAAA ECEEEEEDDCCCDDDCDDDDEBDCCCCDBBD@DBCBCCCC:ACAA?CBCCCABCBBBBBBB?BBB?<?A?<7<<=9 RG:Z:0
FF:80:175 141 * 0 0 * * 0 0 TCGACTCTAGAGGATCCCACGAGTTTCACTGTTGTCACATATGCTGGAGTGCAGTGGTGCAATCTTGGCTTACTGC GGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGFGGFGGDGEEE-:CECEECFEDE?FBCFD=@CBBADCF=CC:CCD RG:Z:0
FR_clip:100:155 77 * 0 0 * * 0 0 GTACCCGGGGATCCCACTCTCTCCTTGCCCTTCATGATCTTGGCACTTTCGGAGTAATGGTCATAACATCAGTAAA ECEEEEEDDCCCDDDCDDDDEBDCCCCDBBD@DBCBCCCC:ACAA?CBCCCABCBBBBBBB?BBB?<?A?<7<<=9 RG:Z:0
FR_clip:100:155 141 * 0 0 * * 0 0 TCGACTCTAGAGGATCCCACGAGTTTCACTGTTGTCACATATGCTGGAGTGCAGTGGTGCAATCTTGGCTTACTGC GGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGFGGFGGDGEEE-:CECEECFEDE?FBCFD=@CBBADCF=CC:CCD RG:Z:0
FR_noclip:100:575 77 * 0 0 * * 0 0 GTACCCGGGGATCCCACTCTCTCCTTGCCCTTCATGATCTTGGCACTTTCGGAGTAATGGTCATAACATCAGTAAA ECEEEEEDDCCCDDDCDDDDEBDCCCCDBBD@DBCBCCCC:ACAA?CBCCCABCBBBBBBB?BBB?<?A?<7<<=9 RG:Z:0
FR_noclip:100:575 141 * 0 0 * * 0 0 TCGACTCTAGAGGATCCCACGAGTTTCACTGTTGTCACATATGCTGGAGTGCAGTGGTGCAATCTTGGCTTACTGC GGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGFGGFGGDGEEE-:CECEECFEDE?FBCFD=@CBBADCF=CC:CCD RG:Z:0
FR_clip:100:155:56:0 77 * 0 0 * * 0 0 ATTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACCTTTAAGGGCAAAAAAAAAACAATAA ECEEEEEDDCCCDDDCDDDDEBDCCCCDBBD@DBCBCCCC:ACAA?CBCCCABCBBBBBBB?BBB?<?A?<7<<=9 RG:Z:0
FR_clip:100:155:56:0 141 * 0 0 * * 0 0 TTAAAGGTTTGTTAATATTTGCATCTGTACGATCGTAAGAGGGCTTCAGCATGAATGGAGTGTTAACGTACTCTAA GGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGFGGFGGDGEEE-:CECEECFEDE?FBCFD=@CBBADCF=CC:CCD RG:Z:0
FR_noclip:100:575:76:1 77 * 0 0 * * 0 0 ATTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACCTTTAAGGGCAAAAAAAAAACAATAA ECEEEEEDDCCCDDDCDDDDEBDCCCCDBBD@DBCBCCCC:ACAA?CBCCCABCBBBBBBB?BBB?<?A?<7<<=9 RG:Z:0
FR_noclip:100:575:76:1 141 * 0 0 * * 0 0 GTATTGTTTTTTTTTTGCCCTTAAAGGTTTGTTAATATTTGCATCTGTACGATCGTAAGAGGGCTTCAGCATGAAA GGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGFGGFGGDGEEE-:CECEECFEDE?FBCFD=@CBBADCF=CC:CCD RG:Z:0
RF:100:575 77 * 0 0 * * 0 0 GTACCCGGGGATCCCACTCTCTCCTTGCCCTTCATGATCTTGGCACTTTCGGAGTAATGGTCATAACATCAGTAAA ECEEEEEDDCCCDDDCDDDDEBDCCCCDBBD@DBCBCCCC:ACAA?CBCCCABCBBBBBBB?BBB?<?A?<7<<=9 RG:Z:0
RF:100:575 141 * 0 0 * * 0 0 TCGACTCTAGAGGATCCCACGAGTTTCACTGTTGTCACATATGCTGGAGTGCAGTGGTGCAATCTTGGCTTACTGC GGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGFGGFGGDGEEE-:CECEECFEDE?FBCFD=@CBBADCF=CC:CCD RG:Z:0
RR:80:175 77 * 0 0 * * 0 0 GTACCCGGGGATCCCACTCTCTCCTTGCCCTTCATGATCTTGGCACTTTCGGAGTAATGGTCATAACATCAGTAAA ECEEEEEDDCCCDDDCDDDDEBDCCCCDBBD@DBCBCCCC:ACAA?CBCCCABCBBBBBBB?BBB?<?A?<7<<=9 RG:Z:0
Expand Down

0 comments on commit 357a388

Please sign in to comment.