diff --git a/src/main/java/picard/sam/markduplicates/MarkDuplicates.java b/src/main/java/picard/sam/markduplicates/MarkDuplicates.java index 179fa42b74..bcde79a5ff 100644 --- a/src/main/java/picard/sam/markduplicates/MarkDuplicates.java +++ b/src/main/java/picard/sam/markduplicates/MarkDuplicates.java @@ -709,7 +709,7 @@ public ReadEndsForMarkDuplicates buildReadEnds(final SAMFileHeader header, final * Goes through the accumulated ReadEndsForMarkDuplicates objects and determines which of them are * to be marked as duplicates. */ - public void generateDuplicateIndexes(final boolean useBarcodes, final boolean indexOpticalDuplicates) { + protected void sortIndicesForDuplicates(final boolean indexOpticalDuplicates){ final int entryOverhead; if (TAG_DUPLICATE_SET_MEMBERS) { // Memory requirements for RepresentativeReadIndexer: @@ -735,6 +735,9 @@ public void generateDuplicateIndexes(final boolean useBarcodes, final boolean in maxInMemory, TMP_DIR); } + } + public void generateDuplicateIndexes(final boolean useBarcodes, final boolean indexOpticalDuplicates) { + sortIndicesForDuplicates(indexOpticalDuplicates); ReadEndsForMarkDuplicates firstOfNextChunk = null; final List nextChunk = new ArrayList<>(200); diff --git a/src/main/java/picard/sam/markduplicates/MarkDuplicatesForFlowHelper.java b/src/main/java/picard/sam/markduplicates/MarkDuplicatesForFlowHelper.java index 7a06380456..2a1e408691 100644 --- a/src/main/java/picard/sam/markduplicates/MarkDuplicatesForFlowHelper.java +++ b/src/main/java/picard/sam/markduplicates/MarkDuplicatesForFlowHelper.java @@ -29,15 +29,9 @@ import htsjdk.samtools.SAMReadGroupRecord; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.util.Log; -import htsjdk.samtools.util.SortingCollection; -import htsjdk.samtools.util.SortingLongCollection; import picard.sam.markduplicates.util.ReadEndsForMarkDuplicates; -import picard.sam.markduplicates.util.RepresentativeReadIndexerCodec; -import picard.sam.util.RepresentativeReadIndexer; -import java.io.File; import java.util.ArrayList; -import java.util.Comparator; import java.util.List; /** @@ -86,32 +80,7 @@ private void validateFlowParameteres() { * applicable for flow mode invocation. */ public void generateDuplicateIndexes(final boolean useBarcodes, final boolean indexOpticalDuplicates) { - final int entryOverhead; - if (md.TAG_DUPLICATE_SET_MEMBERS) { - // Memory requirements for RepresentativeReadIndexer: - // three int entries + overhead: (3 * 4) + 4 = 16 bytes - entryOverhead = 16; - } else { - entryOverhead = SortingLongCollection.SIZEOF; - } - // Keep this number from getting too large even if there is a huge heap. - int maxInMemory = (int) Math.min((Runtime.getRuntime().maxMemory() * 0.25) / entryOverhead, (double) (Integer.MAX_VALUE - 5)); - // If we're also tracking optical duplicates, reduce maxInMemory, since we'll need two sorting collections - if (indexOpticalDuplicates) { - maxInMemory /= ((entryOverhead + SortingLongCollection.SIZEOF) / entryOverhead); - md.opticalDuplicateIndexes = new SortingLongCollection(maxInMemory, md.TMP_DIR.toArray(new File[md.TMP_DIR.size()])); - } - log.info("Will retain up to " + maxInMemory + " duplicate indices before spilling to disk."); - md.duplicateIndexes = new SortingLongCollection(maxInMemory, md.TMP_DIR.toArray(new File[md.TMP_DIR.size()])); - if (md.TAG_DUPLICATE_SET_MEMBERS) { - final RepresentativeReadIndexerCodec representativeIndexCodec = new RepresentativeReadIndexerCodec(); - md.representativeReadIndicesForDuplicates = SortingCollection.newInstance(RepresentativeReadIndexer.class, - representativeIndexCodec, - Comparator.comparing(read -> read.readIndexInFile), - maxInMemory, - md.TMP_DIR); - } - + md.sortIndicesForDuplicates(indexOpticalDuplicates); // this code does support pairs at this time if ( md.pairSort.iterator().hasNext() ) { throw new IllegalArgumentException("Flow based code does not support paired reads");