Skip to content

Commit

Permalink
making optimizations to functoator
Browse files Browse the repository at this point in the history
* improving slow String handling
* enabling feature caching
  • Loading branch information
lbergelson committed May 7, 2018
1 parent e331de3 commit ff3f50c
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -809,14 +809,15 @@ record = header.getProgramRecord(pgID);
*/
protected FeatureInput<? extends Feature> addFeatureInputsAfterInitialization(final String filePath,
final String name,
final Class<? extends Feature> featureType) {
final Class<? extends Feature> featureType,
final int featureQueryLookahead) {

final FeatureInput<? extends Feature> featureInput = new FeatureInput<>(name + FeatureInput.FEATURE_ARGUMENT_TAG_DELIMITER + filePath);

//Add datasource to the feature manager too so that it can be queried. Setting lookahead to 0 to avoid caching.
//Note: we are disabling lookahead here because of windowed queries that need to "look behind" as well.
features.addToFeatureSources(
0,
featureQueryLookahead,
featureInput,
featureType,
cloudPrefetchBuffer,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ public class Funcotator extends VariantWalker {
* The current version of {@link Funcotator}.
*/
public static final String VERSION = "0.0.2";
public static final int FEATURE_QUERY_LOOKAHEAD = 100000;

//==================================================================================================================
// Arguments:
Expand Down Expand Up @@ -580,7 +581,8 @@ private void addFeaturesForLocatableDataSource( final Path dataSourceFile,
IOUtils.getPath( dataSourceProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_SRC_FILE) )
).toUri().toString(),
name,
featureClazz
featureClazz,
FEATURE_QUERY_LOOKAHEAD
);

// Add our feature input to our list of manual inputs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,16 +139,9 @@ public FeatureCodecHeader readHeader(final LineIterator lineIterator) throws IOE
return new FeatureCodecHeader(readActualHeader(lineIterator), FeatureCodecHeader.NO_HEADER_END);
}

@SuppressWarnings( "deprecation" )
@Override
public LocationAware makeIndexableSourceFromStream(final InputStream bufferedInputStream) {
final PositionalBufferedStream pbs;
if (bufferedInputStream instanceof PositionalBufferedStream) {
pbs = (PositionalBufferedStream) bufferedInputStream;
} else {
pbs = new PositionalBufferedStream(bufferedInputStream);
}
return new AsciiLineReaderIterator(new AsciiLineReader(pbs));
return new AsciiLineReaderIterator(AsciiLineReader.from(bufferedInputStream));
}

// ============================================================================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import htsjdk.samtools.util.Locatable;
import htsjdk.tribble.Feature;
import htsjdk.tribble.annotation.Strand;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.exceptions.UserException;
Expand Down Expand Up @@ -108,23 +109,24 @@ protected GencodeGtfFeature(final String[] gtfFields) {
// But we need to match up the field names to the fields themselves:
for ( final String extraField : extraFields ) {

final String[] fieldParts = extraField.trim().split(EXTRA_FIELD_KEY_VALUE_SPLITTER);

if ( fieldParts.length == 1 ){
if ( fieldParts[EXTRA_FIELD_KEY_INDEX].isEmpty() ){
continue;
}
else {
throw new UserException.MalformedFile("Extraneous optional field data - not in a key/value pair: " + extraField);
}
final String trimmedExtraField = extraField.trim();
if (trimmedExtraField.isEmpty()) {
continue;
}

// Each optional field is in a key/value pair:
final String fieldName = fieldParts[EXTRA_FIELD_KEY_INDEX].trim();

// The value of the field may be between two quotes.
// We remove them here.
final String fieldValue = fieldParts[EXTRA_FIELD_VALUE_INDEX].trim().replaceAll("\"", "");
final int splitPoint = trimmedExtraField.indexOf(EXTRA_FIELD_KEY_VALUE_SPLITTER);
final String fieldName;
final String fieldValue;
if(splitPoint == -1 ) {
throw new UserException.MalformedFile("Extraneous optional field data - not in a key/value pair: " + extraField);
} else {
fieldName = trimmedExtraField.substring(0,splitPoint).trim();

// The value of the field may be between two quotes.
// We remove them here.
final String rawFieldValue = trimmedExtraField.substring(splitPoint + 1, trimmedExtraField.length());
fieldValue = StringUtils.remove(rawFieldValue.trim(), '"');
}

OptionalField<?> optionalField = null;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,20 @@ public void metaTestEnsureTempDirs() {
Assert.assertEquals(doDebugTests, false);
}

@Test
public void testLocalThing(){
ArgumentsBuilder args = new ArgumentsBuilder();
args.addArgument("output-file-format", "VCF");
args.addArgument("ref-version", "hg19");
args.addArgument("data-sources-path", "/Users/louisb/data/funcotator/funcotator_dataSources.v1.2.20180329/");
args.addReference(new File("/Users/louisb/data/Homo_sapiens_assembly19.fasta"));
args.addOutput(createTempFile("out",".vcf"));
args.addVCF(new File("/Users/louisb/Workspace/gatk/0816201804HC0_R01C01.vcf.gz"));
args.addArgument("L", "1:1-10000000");
runCommandLine(args);

}

@Test(dataProvider = "provideForIntegrationTest")
public void testFuncotatorWithoutValidatingResults(final String dataSourcesPath,
final String refVer,
Expand Down

0 comments on commit ff3f50c

Please sign in to comment.