Moving after extracting methods for reducing redundancy

broadinstitute · Apr 7, 2023 · 2816540 · 2816540
1 parent 50e9439
commit 2816540
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 33 deletions.
diff --git a/src/main/java/picard/sam/markduplicates/MarkDuplicates.java b/src/main/java/picard/sam/markduplicates/MarkDuplicates.java
@@ -709,7 +709,7 @@ public ReadEndsForMarkDuplicates buildReadEnds(final SAMFileHeader header, final
      * Goes through the accumulated ReadEndsForMarkDuplicates objects and determines which of them are
      * to be marked as duplicates.
      */
-    public void generateDuplicateIndexes(final boolean useBarcodes, final boolean indexOpticalDuplicates) {
+    protected void sortIndicesForDuplicates(final boolean indexOpticalDuplicates){
         final int entryOverhead;
         if (TAG_DUPLICATE_SET_MEMBERS) {
             // Memory requirements for RepresentativeReadIndexer:
@@ -735,6 +735,9 @@ public void generateDuplicateIndexes(final boolean useBarcodes, final boolean in
                     maxInMemory,
                     TMP_DIR);
         }
+    }
+    public void generateDuplicateIndexes(final boolean useBarcodes, final boolean indexOpticalDuplicates) {
+        sortIndicesForDuplicates(indexOpticalDuplicates);
 
         ReadEndsForMarkDuplicates firstOfNextChunk = null;
         final List<ReadEndsForMarkDuplicates> nextChunk = new ArrayList<>(200);

diff --git a/src/main/java/picard/sam/markduplicates/MarkDuplicatesForFlowHelper.java b/src/main/java/picard/sam/markduplicates/MarkDuplicatesForFlowHelper.java
@@ -29,15 +29,9 @@
 import htsjdk.samtools.SAMReadGroupRecord;
 import htsjdk.samtools.SAMRecord;
 import htsjdk.samtools.util.Log;
-import htsjdk.samtools.util.SortingCollection;
-import htsjdk.samtools.util.SortingLongCollection;
 import picard.sam.markduplicates.util.ReadEndsForMarkDuplicates;
-import picard.sam.markduplicates.util.RepresentativeReadIndexerCodec;
-import picard.sam.util.RepresentativeReadIndexer;
 
-import java.io.File;
 import java.util.ArrayList;
-import java.util.Comparator;
 import java.util.List;
 
 /**
@@ -86,32 +80,7 @@ private void validateFlowParameteres() {
      * applicable for flow mode invocation.
      */
     public void generateDuplicateIndexes(final boolean useBarcodes, final boolean indexOpticalDuplicates) {
-        final int entryOverhead;
-        if (md.TAG_DUPLICATE_SET_MEMBERS) {
-            // Memory requirements for RepresentativeReadIndexer:
-            // three int entries + overhead: (3 * 4) + 4 = 16 bytes
-            entryOverhead = 16;
-        } else {
-            entryOverhead = SortingLongCollection.SIZEOF;
-        }
-        // Keep this number from getting too large even if there is a huge heap.
-        int maxInMemory = (int) Math.min((Runtime.getRuntime().maxMemory() * 0.25) / entryOverhead, (double) (Integer.MAX_VALUE - 5));
-        // If we're also tracking optical duplicates, reduce maxInMemory, since we'll need two sorting collections
-        if (indexOpticalDuplicates) {
-            maxInMemory /= ((entryOverhead + SortingLongCollection.SIZEOF) / entryOverhead);
-            md.opticalDuplicateIndexes = new SortingLongCollection(maxInMemory, md.TMP_DIR.toArray(new File[md.TMP_DIR.size()]));
-        }
-        log.info("Will retain up to " + maxInMemory + " duplicate indices before spilling to disk.");
-        md.duplicateIndexes = new SortingLongCollection(maxInMemory, md.TMP_DIR.toArray(new File[md.TMP_DIR.size()]));
-        if (md.TAG_DUPLICATE_SET_MEMBERS) {
-            final RepresentativeReadIndexerCodec representativeIndexCodec = new RepresentativeReadIndexerCodec();
-            md.representativeReadIndicesForDuplicates = SortingCollection.newInstance(RepresentativeReadIndexer.class,
-                    representativeIndexCodec,
-                    Comparator.comparing(read -> read.readIndexInFile),
-                    maxInMemory,
-                    md.TMP_DIR);
-        }
-
+        md.sortIndicesForDuplicates(indexOpticalDuplicates);
         // this code does support pairs at this time
         if ( md.pairSort.iterator().hasNext() ) {
             throw new IllegalArgumentException("Flow based code does not support paired reads");