Skip to content

Commit

Permalink
Fix position annotations to use position in original, not clipped, re…
Browse files Browse the repository at this point in the history
…ad (#4956)
  • Loading branch information
davidbenjamin authored Jul 9, 2018
1 parent d8056d9 commit 111c8ef
Show file tree
Hide file tree
Showing 17 changed files with 607 additions and 611 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import htsjdk.samtools.util.IOUtil;
import htsjdk.tribble.*;
import htsjdk.variant.bcf2.BCF2Codec;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFHeader;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.google.common.primitives.Ints;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.commons.math.util.FastMath;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.Utils;
Expand All @@ -11,6 +12,7 @@
import org.broadinstitute.hellbender.utils.read.ReadUtils;

import java.util.List;
import java.util.OptionalDouble;
import java.util.OptionalInt;

/**
Expand Down Expand Up @@ -41,10 +43,10 @@ protected int aggregate(final List<Integer> values) {

@Override
protected OptionalInt getValueForRead(final GATKRead read, final VariantContext vc) {
Utils.nonNull(read);

final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL, true);
return offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED || AlignmentUtils.isInsideDeletion(read.getCigar(), offset) ?
OptionalInt.empty() : OptionalInt.of(read.getBaseQuality(offset));
if (vc.getStart() < read.getStart() || read.getEnd() < vc.getStart()) {
return OptionalInt.empty();
}
final OptionalDouble result = BaseQualityRankSumTest.getReadBaseQuality(read, vc.getStart());
return result.isPresent() ? OptionalInt.of((int) FastMath.round(result.getAsDouble())) : OptionalInt.empty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,12 @@
* <h3>Caveat</h3>
* <p>The clipping rank sum test cannot be calculated for sites without a mixture of reads showing both the reference and alternate alleles.</p>
*
* <h3>Really Big Caveat</h3>
* <p> In AssemblyRegionWalkers the annotation engine receives reads after they have been hard-clipped to fit the assembly region.
* Thus this annotation should not be used with HaplotypeCaller and Mutect2.</p>
*/
@DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Rank sum test for hard-clipped bases on REF versus ALT reads (ClippingRankSum)")
public final class ClippingRankSumTest extends RankSumTest implements StandardHCAnnotation {
public final class ClippingRankSumTest extends RankSumTest {

@Override
public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.CLIPPING_RANK_SUM_KEY); }
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import org.broadinstitute.barclay.help.DocumentedFeature;
Expand Down Expand Up @@ -62,12 +63,20 @@ public static OptionalDouble getReadPosition(final GATKRead read, final int refL
return OptionalDouble.of(INVALID_ELEMENT_FROM_READ);
}

int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), offset, false, 0, 0);
// hard clips at this point in the code are perfectly good bases that were clipped to make the read fit the assembly region
final Cigar cigar = read.getCigar();
final CigarElement firstElement = cigar.getFirstCigarElement();
final CigarElement lastElement = cigar.getLastCigarElement();
final int leadingHardClips = firstElement.getOperator() == CigarOperator.HARD_CLIP ? firstElement.getLength() : 0;
final int trailingHardClips = lastElement.getOperator() == CigarOperator.HARD_CLIP ? lastElement.getLength() : 0;

int readPos = leadingHardClips + AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), offset, false, 0, 0);
final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read );
final int numOriginalBases = numAlignedBases + leadingHardClips + trailingHardClips;

//After the middle of the read, we compute the postion from the end of the read.
if (readPos > numAlignedBases / 2) {
readPos = numAlignedBases - (readPos + 1);
if (readPos > numOriginalBases / 2) {
readPos = numOriginalBases - (readPos + 1);
}
return OptionalDouble.of(readPos);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import com.google.common.primitives.Ints;
import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.MathUtils;
Expand All @@ -11,6 +14,7 @@
import org.broadinstitute.hellbender.utils.read.ReadUtils;

import java.util.List;
import java.util.OptionalDouble;
import java.util.OptionalInt;

/**
Expand Down Expand Up @@ -43,15 +47,10 @@ protected int aggregate(final List<Integer> values) {

@Override
protected OptionalInt getValueForRead(final GATKRead read, final VariantContext vc) {
Utils.nonNull(read);
final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL, true);
if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED || AlignmentUtils.isInsideDeletion(read.getCigar(), offset)) {
if (vc.getStart() < read.getStart() || read.getEnd() < vc.getStart()) {
return OptionalInt.empty();
}

int readPosition = AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), offset, false, 0, 0);
final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read );
final int distanceFromEnd = Math.min(readPosition, numAlignedBases - readPosition - 1);
return OptionalInt.of(distanceFromEnd);
final OptionalDouble valueAsDouble = ReadPosRankSumTest.getReadPosition(read, vc.getStart());
return valueAsDouble.isPresent() ? OptionalInt.of((int) valueAsDouble.getAsDouble()) : OptionalInt.empty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ protected ReadLikelihoods<Allele> prepareReadAlleleLikelihoodsForAnnotation(
// Otherwise (else part) we need to do it again.
if (configuration.useFilteredReadMapForAnnotations || !configuration.isSampleContaminationPresent()) {
readAlleleLikelihoodsForAnnotations = readAlleleLikelihoodsForGenotyping;
readAlleleLikelihoodsForAnnotations.filterToOnlyOverlappingUnclippedReads(loc);
readAlleleLikelihoodsForAnnotations.filterToOnlyOverlappingReads(loc);
} else {
readAlleleLikelihoodsForAnnotations = readHaplotypeLikelihoods.marginalize(alleleMapper, loc);
if (emitReferenceConfidence) {
Expand Down Expand Up @@ -235,7 +235,7 @@ private static Map<String, List<GATKRead>> overlappingFilteredReads(final Map<St
continue;
}
final List<GATKRead> newList = originalList.stream()
.filter(read -> ReadLikelihoods.unclippedReadOverlapsRegion(read, loc))
.filter(read -> read.overlaps(loc))
.collect(Collectors.toCollection(() -> new ArrayList<>(originalList.size())));

if (!newList.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -680,16 +680,14 @@ private int[][] overlappingReadIndicesBySampleIndex(final Locatable overlap) {
final int sampleCount = samples.numberOfSamples();
final int[][] result = new int[sampleCount][];
final IntArrayList buffer = new IntArrayList(200);
final String contig = overlap.getContig();
final int overlapStart = overlap.getStart();
final int overlapEnd = overlap.getEnd();

for (int s = 0; s < sampleCount; s++) {
buffer.clear();
final GATKRead[] sampleReads = readsBySampleIndex[s];
final int sampleReadCount = sampleReads.length;
buffer.ensureCapacity(sampleReadCount);
for (int r = 0; r < sampleReadCount; r++) {
if (unclippedReadOverlapsRegion(sampleReads[r], contig, overlapStart, overlapEnd)) {
if (sampleReads[r].overlaps(overlap)) {
buffer.add(r);
}
}
Expand All @@ -698,26 +696,6 @@ private int[][] overlappingReadIndicesBySampleIndex(final Locatable overlap) {
return result;
}

public static boolean unclippedReadOverlapsRegion(final GATKRead read, final Locatable region) {
return unclippedReadOverlapsRegion(read, region.getContig(), region.getStart(), region.getEnd());
}

private static boolean unclippedReadOverlapsRegion(final GATKRead sampleRead, final String contig, final int start, final int end) {
final String readReference = sampleRead.getContig();
if (!Objects.equals(readReference, contig)) {
return false;
}

final int readStart = sampleRead.getUnclippedStart();
if (readStart > end) {
return false;
}

final int readEnd = sampleRead.isUnmapped() ? sampleRead.getUnclippedEnd()
: Math.max(sampleRead.getUnclippedEnd(), sampleRead.getUnclippedStart());
return readEnd >= start;
}

// Calculate the marginal likelihoods considering the old -> new allele index mapping.
private double[][][] marginalLikelihoods(final int oldAlleleCount, final int newAlleleCount, final int[] oldToNewAlleleIndexMap, final int[][] readsToKeep) {

Expand Down Expand Up @@ -1151,20 +1129,16 @@ public int sampleReadCount(final int sampleIndex) {
*
* @throws IllegalArgumentException the location cannot be {@code null} nor unmapped.
*/
public void filterToOnlyOverlappingUnclippedReads(final SimpleInterval location) {
public void filterToOnlyOverlappingReads(final SimpleInterval location) {
Utils.nonNull(location, "the location cannot be null");

final int sampleCount = samples.numberOfSamples();

final String locContig = location.getContig();
final int locStart = location.getStart();
final int locEnd = location.getEnd();

final int alleleCount = alleles.numberOfAlleles();
for (int s = 0; s < sampleCount; s++) {
final GATKRead[] sampleReads = readsBySampleIndex[s];
final List<Integer> removeIndices = new IndexRange(0, sampleReads.length)
.filter(r -> !unclippedReadOverlapsRegion(sampleReads[r], locContig, locStart, locEnd));
.filter(r -> !sampleReads[r].overlaps(location));
removeSampleReads(s, removeIndices, alleleCount);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ public void contaminationDownsampling(final Map<String, Double> perSampleDownsam
}

@Override
public void filterToOnlyOverlappingUnclippedReads(final SimpleInterval location) {
public void filterToOnlyOverlappingReads(final SimpleInterval location) {
throw new UnsupportedOperationException("Cannot alter reads in UnfilledReadsLikelihoods object or cached pileups may be rendered inaccurate, please use a normal ReadsLikelihoods object");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,21 @@ public static GATKRead createReadAlignedToRef(final GATKRead originalRead,
final Cigar readToRefCigar = leftAlignIndel(readToRefCigarClean, refHaplotype.getBases(),
originalRead.getBases(), swPairwiseAlignment.getAlignmentOffset(), 0, true);

read.setCigar(readToRefCigar);

// the SW Cigar does not contain the hard clips of the original read
final Cigar originalCigar = originalRead.getCigar();
final CigarElement firstElement = originalCigar.getFirstCigarElement();
final CigarElement lastElement = originalCigar.getLastCigarElement();
final List<CigarElement> readToRefCigarElementsWithHardClips = new ArrayList<>();
if (firstElement.getOperator() == CigarOperator.HARD_CLIP) {
readToRefCigarElementsWithHardClips.add(firstElement);
}
readToRefCigarElementsWithHardClips.addAll(readToRefCigar.getCigarElements());
if (lastElement.getOperator() == CigarOperator.HARD_CLIP) {
readToRefCigarElementsWithHardClips.add(lastElement);
}

read.setCigar(new Cigar(readToRefCigarElementsWithHardClips));

if ( readToRefCigar.getReadLength() != read.getLength() ) {
throw new GATKException("Cigar " + readToRefCigar + " with read length " + readToRefCigar.getReadLength()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ public void testAnnotationGroupAdditive(){

//check that annotations from both specified groups are present
Assert.assertTrue(annots.stream().anyMatch(a -> a.getClass().getSimpleName().equals(Coverage.class.getSimpleName())));
Assert.assertTrue(annots.stream().anyMatch(a -> a.getClass().getSimpleName().equals(ClippingRankSumTest.class.getSimpleName())));
Assert.assertTrue(annots.stream().anyMatch(a -> a.getClass().getSimpleName().equals(DepthPerSampleHC.class.getSimpleName())));
}

@DataProvider
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ public void testFilterReadsToOverlap(final String[] samples, final Allele[] alle
final SimpleInterval evenReadOverlap = new SimpleInterval(SAM_HEADER.getSequenceDictionary().getSequences().get(0).getSequenceName(), EVEN_READ_START, EVEN_READ_START);
fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods<Allele> result = original.copy();
result.filterToOnlyOverlappingUnclippedReads(evenReadOverlap);
result.filterToOnlyOverlappingReads(evenReadOverlap);
final double[][][] newLikelihoods = new double[samples.length][alleles.length][];
for (int s = 0; s < samples.length ; s++)
for (int a = 0; a < alleles.length; a++) {
Expand Down
Loading

0 comments on commit 111c8ef

Please sign in to comment.