Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GetPileupSummaries streams output rather than storing it in memory #7664

Merged
merged 3 commits into from
Jun 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@
import org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary;
import org.broadinstitute.hellbender.engine.filters.WellformedReadFilter;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.pileup.ReadPileup;
import org.broadinstitute.hellbender.utils.read.ReadUtils;
import org.broadinstitute.hellbender.utils.tsv.TableUtils;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

Expand Down Expand Up @@ -137,11 +140,11 @@ public class GetPileupSummaries extends LocusWalker {
@Argument(fullName = MIN_MAPPING_QUALITY_LONG_NAME, shortName = MIN_MAPPING_QUALITY_SHORT_NAME, doc = "Minimum read mapping quality", optional = true)
private int minMappingQuality = DEFAULT_MINIMUM_MAPPING_QUALITY;

private final List<PileupSummary> pileupSummaries = new ArrayList<>();

private boolean sawVariantsWithoutAlleleFrequency = false;
private boolean sawVariantsWithAlleleFrequency = false;

PileupSummary.PileupSummaryTableWriter writer;

@Override
public boolean requiresReads() {
return true;
Expand Down Expand Up @@ -185,6 +188,15 @@ public void onTraversalStart() {
if (!alleleFrequencyInHeader) {
throw new UserException.BadInput("Population vcf does not have an allele frequency (AF) info field in its header.");
}

try {
writer = new PileupSummary.PileupSummaryTableWriter(IOUtils.fileToPath(outputTable));
final String sampleName = ReadUtils.getSamplesFromHeader(getHeaderForReads()).stream().findFirst().get();
writer.writeMetadata(TableUtils.SAMPLE_METADATA_TAG, sampleName);
} catch (IOException ex) {
throw new UserException.CouldNotCreateOutputFile(outputTable, ex);
}

}

@Override
Expand All @@ -198,7 +210,11 @@ public void apply(AlignmentContext alignmentContext, ReferenceContext referenceC
if ( vc.isBiallelic() && vc.isSNP() && alleleFrequencyInRange(vc) ) {
final ReadPileup pileup = alignmentContext.getBasePileup()
.makeFilteredPileup(pe -> pe.getRead().getMappingQuality() >= minMappingQuality);
pileupSummaries.add(new PileupSummary(vc, pileup));
try {
writer.writeRecord(new PileupSummary(vc, pileup));
} catch (final IOException ex) {
throw new UserException(String.format("Encountered an IO exception while writing to %s", outputTable));
}
}
}

Expand All @@ -207,11 +223,21 @@ public Object onTraversalSuccess() {
if (sawVariantsWithoutAlleleFrequency && !sawVariantsWithAlleleFrequency) {
throw new UserException.BadInput("No variants in population vcf had an allele frequency (AF) field.");
}
final String sampleName = ReadUtils.getSamplesFromHeader(getHeaderForReads()).stream().findFirst().get();
PileupSummary.writeToFile(sampleName, pileupSummaries, outputTable);

return "SUCCESS";
}

@Override
public void closeTool() {
try {
davidbenjamin marked this conversation as resolved.
Show resolved Hide resolved
if (writer != null) {
writer.close();
}
} catch (IOException ex) {
throw new UserException(String.format("Encountered an IO exception while closing %s", outputTable));
}
}

private boolean alleleFrequencyInRange(final VariantContext vc) {
if (!vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)) {
if (!sawVariantsWithoutAlleleFrequency) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,8 @@ public int compare(PileupSummary ps1, PileupSummary ps2) {
}

//-------- The following methods are boilerplate for reading and writing pileup summary tables
private static class PileupSummaryTableWriter extends TableWriter<PileupSummary> {
private PileupSummaryTableWriter(final Path output) throws IOException {
public static class PileupSummaryTableWriter extends TableWriter<PileupSummary> {
public PileupSummaryTableWriter(final Path output) throws IOException {
super(output, PileupSummaryTableColumn.COLUMNS);
}

Expand Down