Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CondenseDepthEvidence tool #7926

Merged
merged 1 commit into from
Jul 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
package org.broadinstitute.hellbender.tools.sv;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.tribble.Feature;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.argparser.ExperimentalFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariantDiscoveryProgramGroup;
import org.broadinstitute.hellbender.engine.*;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.codecs.FeatureOutputCodec;
import org.broadinstitute.hellbender.utils.codecs.FeatureOutputCodecFinder;
import org.broadinstitute.hellbender.utils.codecs.FeatureSink;

/**
* <p>Combines adjacent intervals in DepthEvidence files.</p>
*
* <h3>Inputs</h3>
*
* <ul>
* <li>
* A locus-sorted DepthEvidence file (name ends with ".rd.txt", ".rd.txt.gz", or "rd.bci").
* </li>
* </ul>
*
* <h3>Output</h3>
*
* <ul>
* <li>
* An output file containing merged evidence from the inputs.
* </li>
* </ul>
*
* <h3>Usage example</h3>
*
* <pre>
* gatk CondenseDepthEvidence \
* -F input.rd.txt.gz \
* -O merged.rd.txt.gz
* </pre>
*
* @author Ted Sharpe &lt;[email protected]&gt;
*/
@CommandLineProgramProperties(
summary = "Merges adjacent DepthEvidence records.",
oneLineSummary = "Merges adjacent DepthEvidence records.",
programGroup = StructuralVariantDiscoveryProgramGroup.class
)
@ExperimentalFeature
public class CondenseDepthEvidence extends FeatureWalker<DepthEvidence> {
public static final String INPUT_ARGNAME = "depth-evidence";
public static final String COMPRESSION_LEVEL_ARGNAME = "compression-level";
public static final String MAX_INTERVAL_SIZE_ARGNAME = "max-interval-size";
public static final String MIN_INTERVAL_SIZE_ARGNAME = "min-interval-size";

@Argument(
doc = "DepthEvidence input file",
fullName = INPUT_ARGNAME,
shortName = StandardArgumentDefinitions.FEATURE_SHORT_NAME
)
private GATKPath inputPath;

@Argument(
doc = "Merged DepthEvidence output file",
fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME,
shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME
)
private GATKPath outputPath;

@Argument(
doc = "Maximum interval size",
fullName = MAX_INTERVAL_SIZE_ARGNAME,
optional = true
)
private int maxIntervalLength = 1000;

@Argument(
doc = "Minimum interval size",
fullName = MIN_INTERVAL_SIZE_ARGNAME,
optional = true
)
private int minIntervalLength = 0;

@Argument(
doc = "Output compression level",
fullName = COMPRESSION_LEVEL_ARGNAME,
minValue = 0, maxValue = 9, optional = true
)
private int compressionLevel = 4;

private FeatureSink<DepthEvidence> outputSink;
private DepthEvidence accumulator;


@Override
public GATKPath getDrivingFeaturePath() {
return inputPath;
}

@Override
protected boolean isAcceptableFeatureType( final Class<? extends Feature> featureType ) {
return featureType.equals(DepthEvidence.class);
}

@Override
@SuppressWarnings("unchecked")
public void onTraversalStart() {
super.onTraversalStart();

if ( minIntervalLength > maxIntervalLength ) {
throw new UserException("Minimum interval length exceeds maximum interval length.");
}

final FeatureOutputCodec<? extends Feature, ? extends FeatureSink<? extends Feature>> codec =
FeatureOutputCodecFinder.find(outputPath);
final Class<? extends Feature> codecFeatureClass = codec.getFeatureType();
if ( !codecFeatureClass.equals(DepthEvidence.class) ) {
throw new UserException("Output file " + outputPath + " implies Feature subtype " +
codecFeatureClass.getSimpleName() +
", but this tool expects to write DepthEvidence.");
}
final SVFeaturesHeader header = (SVFeaturesHeader)getDrivingFeaturesHeader();
final SAMSequenceDictionary dict =
header.getDictionary() != null ? header.getDictionary() : getBestAvailableSequenceDictionary();
outputSink = (FeatureSink<DepthEvidence>)codec.makeSink(outputPath, dict,
header.getSampleNames(), compressionLevel);
}

@Override
public void apply( final DepthEvidence feature,
final ReadsContext readsContext,
final ReferenceContext referenceContext,
final FeatureContext featureContext ) {
if ( accumulator == null ) {
accumulator = feature;
return;
}
final int intervalLength = accumulator.getLengthOnReference();
if ( !isAdjacent(accumulator, feature) || intervalLength >= maxIntervalLength ) {
if ( intervalLength >= minIntervalLength ) {
outputSink.write(accumulator);
}
accumulator = feature;
return;
}
final int[] accumCounts = accumulator.getCounts();
MathUtils.addToArrayInPlace(accumCounts, feature.getCounts());
accumulator = new DepthEvidence(feature.getContig(), accumulator.getStart(),
feature.getEnd(), accumCounts);
}

@Override
public Object onTraversalSuccess() {
super.onTraversalSuccess();
if ( accumulator != null && accumulator.getLengthOnReference() >= minIntervalLength ) {
outputSink.write(accumulator);
}
outputSink.close();
return null;
}

private static boolean isAdjacent( final DepthEvidence ev1, final DepthEvidence ev2 ) {
return ev1.getContig().equals(ev2.getContig()) && ev1.getEnd() + 1 == ev2.getStart();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,11 @@ public void write(final F feature) {
@Override
public void close() {
try {
outputStream.close(); // do this first so that the timestamp on the index will be later
if (indexCreator != null) {
final Index index = indexCreator.finalizeIndex(locationAware.getPosition());
index.writeBasedOnFeaturePath(featurePath);
}
outputStream.close();
} catch (final IOException e) {
throw new GATKException("Error closing output", e);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package org.broadinstitute.hellbender.tools.sv;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.util.Log;
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.testutils.IntegrationTestSpec;
import org.testng.annotations.Test;

import java.io.IOException;
import java.util.Collections;

public class CondenseDepthEvidenceIntegrationTest extends CommandLineProgramTest {
public static final String printEvidenceTestDir = toolsTestDir + "walkers/sv/printevidence/";
public static final String sequenceDict = largeFileTestDir + "human_g1k_v37.20.21.dict";

@Test
public void positiveTest() throws IOException {
final ArgumentsBuilder argsBuilder = new ArgumentsBuilder();
argsBuilder.add(StandardArgumentDefinitions.VERBOSITY_NAME, Log.LogLevel.ERROR.name());
argsBuilder.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, sequenceDict);
argsBuilder.add(CondenseDepthEvidence.MIN_INTERVAL_SIZE_ARGNAME, 101);
argsBuilder.add(StandardArgumentDefinitions.FEATURE_SHORT_NAME, printEvidenceTestDir + "output.test_hg38.rd.txt");
argsBuilder.add(StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "%s");
final IntegrationTestSpec testSpec =
new IntegrationTestSpec(argsBuilder.getString(),
Collections.singletonList(printEvidenceTestDir + "merged.rd.txt"));
testSpec.setOutputFileExtension("rd.txt");
testSpec.executeTest("positive test", this);
}

@Test
public void negativeTest() throws IOException {
final ArgumentsBuilder argsBuilder = new ArgumentsBuilder();
argsBuilder.add(StandardArgumentDefinitions.VERBOSITY_NAME, Log.LogLevel.ERROR.name());
argsBuilder.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, sequenceDict);
argsBuilder.add(StandardArgumentDefinitions.FEATURE_SHORT_NAME, printEvidenceTestDir + "no_condense.rd.txt");
argsBuilder.add(StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "%s");

// output equal to input, because this file contains rows that can't be merged for various reasons
final IntegrationTestSpec testSpec =
new IntegrationTestSpec(argsBuilder.getString(),
Collections.singletonList(printEvidenceTestDir + "no_condense.rd.txt"));
testSpec.setOutputFileExtension("rd.txt");
testSpec.executeTest("negative test", this);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#Chr Start End HG00096 HG00129 HG00140
chr22 30499964 30500964 217 182 243
chr22 30500964 30501964 242 175 218
chr22 30501964 30502964 244 220 261
chr22 30502964 30503964 261 223 243
chr22 30503964 30504964 224 204 234
chr22 30504964 30505964 313 217 307
chr22 30505964 30506964 241 197 247
chr22 30506964 30507964 240 226 238
chr22 30507964 30508964 295 240 262
chr22 30508964 30509964 234 179 206
chr22 30509964 30510964 212 214 250
chr22 30510964 30511964 243 210 247
chr22 30511964 30512964 263 206 231
chr22 30512964 30513964 235 202 231
chr22 30513964 30514964 286 237 290
chr22 30514964 30515964 252 208 241
chr22 30515964 30516964 192 203 200
chr22 30516964 30517964 253 185 225
chr22 30517964 30518964 226 188 243
chr22 30518964 30519964 208 166 204
chr22 30519964 30520964 253 230 202
chr22 30520964 30521964 252 279 257
chr22 30521964 30522964 229 189 229
chr22 30522964 30523964 234 201 235
chr22 30523964 30524964 270 217 215
chr22 30524964 30525964 313 239 267
chr22 30525964 30526964 301 202 245
chr22 30526964 30527964 206 237 223
chr22 30527964 30528964 260 203 272
chr22 30528964 30529964 248 195 295
chr22 30529964 30530964 239 193 241
chr22 30530964 30531964 267 236 265
chr22 30531964 30532964 306 238 317
chr22 30532964 30533964 283 238 274
chr22 30533964 30534964 243 177 294
chr22 30534964 30535964 257 220 287
chr22 30535964 30536964 229 204 210
chr22 30536964 30537964 275 210 275
chr22 30537964 30538964 201 227 193
chr22 30538964 30539964 243 219 234
chr22 30539964 30540964 263 213 250
chr22 30540964 30541964 270 242 261
chr22 30541964 30542964 281 197 254
chr22 30542964 30543964 271 197 249
chr22 30543964 30544964 233 226 246
chr22 30544964 30545964 245 210 233
chr22 30545964 30546964 218 149 229
chr22 30546964 30547964 250 165 287
chr22 30547964 30548964 241 215 243
chr22 30548964 30549964 262 248 285
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#Chr Start End HG00096 HG00129 HG00140
chr20 30499964 30500964 217 182 243
chr20 30500963 30501964 242 175 218
chr20 30501965 30502964 244 220 261
chr21 30502964 30503964 261 223 243
chr21 30504000 30505000 261 223 243
chr21 30505000 30506000 261 223 243