Skip to content

Commit

Permalink
Moving after extracting methods for reducing redundancy
Browse files Browse the repository at this point in the history
  • Loading branch information
LadDeep committed Apr 7, 2023
1 parent 50e9439 commit 2816540
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 33 deletions.
5 changes: 4 additions & 1 deletion src/main/java/picard/sam/markduplicates/MarkDuplicates.java
Original file line number Diff line number Diff line change
Expand Up @@ -709,7 +709,7 @@ public ReadEndsForMarkDuplicates buildReadEnds(final SAMFileHeader header, final
* Goes through the accumulated ReadEndsForMarkDuplicates objects and determines which of them are
* to be marked as duplicates.
*/
public void generateDuplicateIndexes(final boolean useBarcodes, final boolean indexOpticalDuplicates) {
protected void sortIndicesForDuplicates(final boolean indexOpticalDuplicates){
final int entryOverhead;
if (TAG_DUPLICATE_SET_MEMBERS) {
// Memory requirements for RepresentativeReadIndexer:
Expand All @@ -735,6 +735,9 @@ public void generateDuplicateIndexes(final boolean useBarcodes, final boolean in
maxInMemory,
TMP_DIR);
}
}
public void generateDuplicateIndexes(final boolean useBarcodes, final boolean indexOpticalDuplicates) {
sortIndicesForDuplicates(indexOpticalDuplicates);

ReadEndsForMarkDuplicates firstOfNextChunk = null;
final List<ReadEndsForMarkDuplicates> nextChunk = new ArrayList<>(200);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,9 @@
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.SortingCollection;
import htsjdk.samtools.util.SortingLongCollection;
import picard.sam.markduplicates.util.ReadEndsForMarkDuplicates;
import picard.sam.markduplicates.util.RepresentativeReadIndexerCodec;
import picard.sam.util.RepresentativeReadIndexer;

import java.io.File;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;

/**
Expand Down Expand Up @@ -86,32 +80,7 @@ private void validateFlowParameteres() {
* applicable for flow mode invocation.
*/
public void generateDuplicateIndexes(final boolean useBarcodes, final boolean indexOpticalDuplicates) {
final int entryOverhead;
if (md.TAG_DUPLICATE_SET_MEMBERS) {
// Memory requirements for RepresentativeReadIndexer:
// three int entries + overhead: (3 * 4) + 4 = 16 bytes
entryOverhead = 16;
} else {
entryOverhead = SortingLongCollection.SIZEOF;
}
// Keep this number from getting too large even if there is a huge heap.
int maxInMemory = (int) Math.min((Runtime.getRuntime().maxMemory() * 0.25) / entryOverhead, (double) (Integer.MAX_VALUE - 5));
// If we're also tracking optical duplicates, reduce maxInMemory, since we'll need two sorting collections
if (indexOpticalDuplicates) {
maxInMemory /= ((entryOverhead + SortingLongCollection.SIZEOF) / entryOverhead);
md.opticalDuplicateIndexes = new SortingLongCollection(maxInMemory, md.TMP_DIR.toArray(new File[md.TMP_DIR.size()]));
}
log.info("Will retain up to " + maxInMemory + " duplicate indices before spilling to disk.");
md.duplicateIndexes = new SortingLongCollection(maxInMemory, md.TMP_DIR.toArray(new File[md.TMP_DIR.size()]));
if (md.TAG_DUPLICATE_SET_MEMBERS) {
final RepresentativeReadIndexerCodec representativeIndexCodec = new RepresentativeReadIndexerCodec();
md.representativeReadIndicesForDuplicates = SortingCollection.newInstance(RepresentativeReadIndexer.class,
representativeIndexCodec,
Comparator.comparing(read -> read.readIndexInFile),
maxInMemory,
md.TMP_DIR);
}

md.sortIndicesForDuplicates(indexOpticalDuplicates);
// this code does support pairs at this time
if ( md.pairSort.iterator().hasNext() ) {
throw new IllegalArgumentException("Flow based code does not support paired reads");
Expand Down

0 comments on commit 2816540

Please sign in to comment.