Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modified HaplotypeBasedVariantRecaller to support non-flow reads #8896

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,11 @@ public void traverse() {
}

// get reads overlapping haplotypes
final Map<SamReader, Collection<FlowBasedRead>> readsByReader = readsReader.getReads(haplotypeSpan, vcLoc);
final Map<SamReader, Collection<GATKRead>> readsByReader = readsReader.getReads(haplotypeSpan, vcLoc);
final List<VariantContext> variants = new LinkedList<>(Arrays.asList(vc));
if ( logger.isDebugEnabled() ) {
int readCount = 0;
for ( Collection<FlowBasedRead> reads : readsByReader.values() )
for ( Collection<GATKRead> reads : readsByReader.values() )
readCount += reads.size();
logger.debug(String.format("vcLoc %s, haplotypeSpan: %s, %d haplotypes, %d reads",
vcLoc.toString(), haplotypeSpan.toString(), processedHaplotypes.size(), readCount, variants.size()));
Expand All @@ -150,16 +150,16 @@ public void traverse() {
final List<Map<Integer, AlleleLikelihoods<GATKRead, Allele>>> genotypeLikelihoodsList = new LinkedList<>();
final List<AssemblyResultSet> assemblyResultList = new LinkedList<>();
final List<SAMFileHeader> readsHeaderList = new LinkedList<>();
for ( Map.Entry<SamReader, Collection<FlowBasedRead>> entry : readsByReader.entrySet() ) {
for ( Map.Entry<SamReader, Collection<GATKRead>> entry : readsByReader.entrySet() ) {
final AssemblyResultSet assemblyResult = new AssemblyResultSet();
processedHaplotypes.forEach(haplotype -> assemblyResult.add(haplotype));

final Map<String, List<GATKRead>> perSampleReadList = new LinkedHashMap<>();
final SamReader samReader = entry.getKey();
final Collection<FlowBasedRead> reads = entry.getValue();
final Collection<GATKRead> reads = entry.getValue();

List<GATKRead> gtakReads = new LinkedList<>();
reads.forEach(flowBasedRead -> gtakReads.add(flowBasedRead));
reads.forEach(read -> gtakReads.add(read));
perSampleReadList.put(sampleNames[0], gtakReads);
AssemblyRegion regionForGenotyping = new AssemblyRegion(haplotypeSpan, 0, samReader.getFileHeader());
assemblyResult.setPaddedReferenceLoc(haplotypeSpan);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ public class TrimmedReadsReader {

private final List<SamReader> samReaders = new LinkedList<>();
private CountingReadFilter readFilter;
private final Map<String, Integer> readGroupMaxClass = new LinkedHashMap<>();
private final Map<String, String> readGroupFlowOrder = new LinkedHashMap<>();
private final FlowBasedArgumentCollection fbArgs = new FlowBasedArgumentCollection();

public TrimmedReadsReader(final List<Path> readsFiles, final Path referencePath, final int cloudPrefetchBuffer) {
Expand All @@ -44,11 +42,12 @@ public SAMSequenceDictionary getSamSequenceDictionary(final SamReader samReader)
return ((samReader != null) ? samReader : samReaders.get(0)).getFileHeader().getSequenceDictionary();
}

public Map<SamReader, Collection<FlowBasedRead>> getReads(final Locatable span, final Locatable vcLoc) {

final Map<SamReader, Collection<FlowBasedRead>> readsByReader = new LinkedHashMap<>();
public Map<SamReader, Collection<GATKRead>> getReads(final Locatable span, final Locatable vcLoc) {

final Map<SamReader, Collection<GATKRead>> readsByReader = new LinkedHashMap<>();
for ( SamReader samReader : samReaders ) {
final List<FlowBasedRead> reads = new LinkedList<>();
final List<GATKRead> reads = new LinkedList<>();
final SAMRecordIterator iter = samReader.query(span.getContig(), span.getStart(), span.getEnd(), false);
while (iter.hasNext()) {

Expand All @@ -72,7 +71,10 @@ public Map<SamReader, Collection<FlowBasedRead>> getReads(final Locatable span,
gatkRead = ReadClipper.hardClipToRegion(gatkRead, span.getStart(), span.getEnd());
if (gatkRead.isUnmapped() || gatkRead.getCigar().isEmpty())
continue;

if (!FlowBasedReadUtils.isFlowPlatform(samReader.getFileHeader(), gatkRead)){
reads.add(gatkRead);
continue;
}
// convert to a flow based read
FlowBasedReadUtils.ReadGroupInfo rgInfo = FlowBasedReadUtils.getReadGroupInfo(samReader.getFileHeader(), gatkRead);
final FlowBasedRead fbr = new FlowBasedRead(gatkRead, rgInfo.flowOrder, rgInfo.maxClass, fbArgs);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.read.FlowBasedRead;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
Expand Down Expand Up @@ -68,10 +69,10 @@ public void testBasic(final BamSource bamSources[], final Locatable span, final
Assert.assertNotNull(reader.getHeader(null));

// reads
Map<SamReader, Collection<FlowBasedRead>> reads = reader.getReads(span, vcLoc);
Map<SamReader, Collection<GATKRead>> reads = reader.getReads(span, vcLoc);
Assert.assertEquals(reads.size(), bamSources.length);
int bamSourceIndex = 0;
for ( Map.Entry<SamReader, Collection<FlowBasedRead>> entry : reads.entrySet() ) {
for ( Map.Entry<SamReader, Collection<GATKRead>> entry : reads.entrySet() ) {

final BamSource bamSource = bamSources[bamSourceIndex++];

Expand All @@ -83,8 +84,8 @@ public void testBasic(final BamSource bamSources[], final Locatable span, final
Assert.assertEquals(entry.getValue().size(), bamSource.readCount);

// verify first and last
FlowBasedRead firstRead = entry.getValue().iterator().next();
FlowBasedRead lastRead = entry.getValue().stream().reduce((prev, next) -> next).orElse(null);
FlowBasedRead firstRead = (FlowBasedRead) entry.getValue().iterator().next();
FlowBasedRead lastRead = (FlowBasedRead) entry.getValue().stream().reduce((prev, next) -> next).orElse(null);
Assert.assertEquals(firstRead.getName(), bamSource.firstReadName);
Assert.assertEquals(lastRead.getName(), bamSource.lastReadName);
}
Expand Down
Loading