diff --git a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java index 7c23bf973af..f1bb4ce5bd2 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java @@ -821,6 +821,17 @@ record = header.getProgramRecord(pgID); */ protected String getToolkitShortName() { return "GATK"; } + /** + * Call {@link GATKTool#addFeatureInputsAfterInitialization(String, String, Class, int)} with no caching. + * + * @return The {@link FeatureInput} used as the key for this data source. + */ + protected FeatureInput addFeatureInputsAfterInitialization(final String filePath, final String name, + final Class featureType) { + + return addFeatureInputsAfterInitialization(filePath, name, featureType, 0); + } + /** * A method to allow a user to inject data sources after initialization that were not specified as command-line * arguments. diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/AnnotatedIntervalCodec.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/AnnotatedIntervalCodec.java index df62edc828a..ca78a682204 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/AnnotatedIntervalCodec.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/AnnotatedIntervalCodec.java @@ -79,7 +79,7 @@ public AnnotatedIntervalHeader readActualHeader(final LineIterator reader) { @Override public boolean canDecode(final String path) { - return (path.endsWith(".seg") && xsvLocatableTableCodec.canDecodeMinusExtensionChecks(path)); + return (path.endsWith(".seg") || path.endsWith(".maf") || path.endsWith(".maf.annotated")) && xsvLocatableTableCodec.canDecodeMinusExtensionChecks(path); } /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/DataSourceFuncotationFactory.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/DataSourceFuncotationFactory.java index b00ef7f4f18..4132b9c5810 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/DataSourceFuncotationFactory.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/DataSourceFuncotationFactory.java @@ -128,11 +128,13 @@ public List createFuncotations(final VariantContext variant, final // Get the features that this funcotation factory is responsible for: final List featureList = getFeatureListFromMap(featureSourceMap); + final List outputFuncotations; + // If our featureList is compatible with this DataSourceFuncotationFactory, then we make our funcotations: if ( isFeatureListCompatible(featureList) ) { // Create our funcotations: - final List outputFuncotations; + if ( gencodeFuncotations == null ) { outputFuncotations = createFuncotationsOnVariant(variant, referenceContext, featureList); } @@ -142,12 +144,16 @@ public List createFuncotations(final VariantContext variant, final // Set our overrides: setOverrideValuesInFuncotations(outputFuncotations); - - return outputFuncotations; } else { return createDefaultFuncotationsOnVariant(variant, referenceContext); } + + if ((outputFuncotations == null) || (outputFuncotations.size() == 0)) { + return createDefaultFuncotationsOnVariant(variant, referenceContext); + } else { + return outputFuncotations; + } } /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotation.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotation.java index 51ef5cc6e8d..ad2b9dc43b5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotation.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotation.java @@ -72,4 +72,9 @@ default void setFieldSerializationOverrideValues(final Map fieldS default String serializeToVcfString(final String manualAnnotationString) { return (manualAnnotationString == null ? "" : manualAnnotationString) + serializeToVcfString(); } + + /** + * @return Return whether the field exists in this {@link Funcotation}. + */ + boolean hasField(final String fieldName); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotationMap.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotationMap.java new file mode 100644 index 00000000000..117e0b7da9a --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotationMap.java @@ -0,0 +1,208 @@ +package org.broadinstitute.hellbender.tools.funcotator; + +import htsjdk.variant.variantcontext.Allele; +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.broadinstitute.hellbender.exceptions.GATKException; +import org.broadinstitute.hellbender.tools.funcotator.dataSources.TableFuncotation; +import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation; +import org.broadinstitute.hellbender.tools.funcotator.vcfOutput.VcfOutputRenderer; +import org.broadinstitute.hellbender.utils.Utils; + +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + + +/** + * A linked map of transcript IDs to funcotations. Also, supports some querying. + * + * Supports ordering of the transcripts. + * + * Multiple GencodeFuncotations for the same transcript are prohibited. + * + * Not thread-safe. + */ +public class FuncotationMap { + + public final static String NO_TRANSCRIPT_AVAILABLE_KEY = "no_transcript"; + + /** Standard Logger. */ + protected static final Logger logger = LogManager.getLogger(FuncotationMap.class); + + + final private Map> txToFuncotations = new LinkedHashMap<>(); + + private FuncotationMap() {} + + private static boolean isGencodeFuncotation(final Funcotation f) { + return f instanceof GencodeFuncotation; + } + + /** + * @param transcriptId the specified transcript ID. Use {@see NO_TRANSCRIPT_AVAILABLE_KEY} if there are no transcripts. Never {@code null} + * @return A list of the Gencode Funcotations only. Empty list, if nothing found. Never {@code null} + */ + public List getGencodeFuncotations(final String transcriptId) { + Utils.nonNull(transcriptId); + return txToFuncotations.getOrDefault(transcriptId, new LinkedHashSet<>()).stream() + .filter(FuncotationMap::isGencodeFuncotation).map(f-> (GencodeFuncotation) f).collect(Collectors.toList()); + } + + /** + * Get all funcotations for the given transcript. Note that the alleles will be mixed. + * @param transcriptId the specified transcript ID. Use {@see NO_TRANSCRIPT_AVAILABLE_KEY} if there are no transcripts. + * @return Empty list, if nothing found. Never {@code null} + */ + public List get(final String transcriptId) { + Utils.nonNull(transcriptId); + return new ArrayList<>(txToFuncotations.getOrDefault(transcriptId, new LinkedHashSet<>())); + } + + /** + * @param transcriptId the specified transcript ID. Use {@see NO_TRANSCRIPT_AVAILABLE_KEY} if there are no transcripts. Never {@code null} + * @param fieldName The field name to search. Never {@code null} + * @param allele Only return fields from funcotations with the specified allele. Never {@code null} + * @return Value of the given field for the transcript ID and allele. Return {@code null} if field not found. + */ + public String getFieldValue(final String transcriptId, final String fieldName, final Allele allele) { + Utils.nonNull(transcriptId); + Utils.nonNull(fieldName); + Utils.nonNull(allele); + final List values = txToFuncotations.getOrDefault(transcriptId, new LinkedHashSet<>()).stream() + .filter(f -> f.hasField(fieldName)) + .filter(f -> f.getAltAllele().equals(allele)) + .map(f -> f.getField(fieldName)) + .collect(Collectors.toList()); + if (values.size() > 1) { + throw new GATKException.ShouldNeverReachHereException("Found more than one value for " + transcriptId + ", " + + allele + ", " + fieldName); + } + if (values.size() == 0) { + return null; + } else { + return values.get(0); + } + } + + /** + * @return An empty FuncotationMap. Never {@code null} + */ + private static FuncotationMap createEmpty() { + return new FuncotationMap(); + } + + /** + * @return A FuncotationMap with only one transcript ID, which is {@see NO_TRANSCRIPT_AVAILABLE_KEY}. Never {@code null} + */ + public static FuncotationMap createNoTranscriptInfo(final List funcotations) { + final FuncotationMap result = createEmpty(); + result.add(NO_TRANSCRIPT_AVAILABLE_KEY, funcotations); + return result; + } + + /** This method checks for transcript duplications in the list of GencodeFuncotations. + * Recommended to gather all gencode funcotations before calling this method. + * @param gencodeFuncotations Never {@code null} + * @return A new FuncotationMap created only from the given list of GencodeFuncotations. Never {@code null} + */ + public static FuncotationMap createFromGencodeFuncotations(final List gencodeFuncotations) { + + Utils.nonNull(gencodeFuncotations); + Utils.validateArg(!areDuplicateTranscriptIDsFound(gencodeFuncotations), "Duplicate transcript ID entries were found in input: " + + gencodeFuncotations.stream().map(gf -> gf.getAnnotationTranscript()).collect(Collectors.joining(","))); + final FuncotationMap result = createEmpty(); + gencodeFuncotations.forEach(f -> result.add(f.getAnnotationTranscript(), f)); + return result; + } + + /** + * Add the given funcotations to the given transcript ID. + * + * @param txId Never {@code null} + * @param funcotations Never {@code null} + */ + public void add(final String txId, final List funcotations) { + Utils.nonNull(txId); + Utils.nonNull(funcotations); + + final LinkedHashSet existingFuncotationsToUpdate = txToFuncotations.getOrDefault(txId, new LinkedHashSet<>()); + existingFuncotationsToUpdate.addAll(funcotations); + txToFuncotations.put(txId, existingFuncotationsToUpdate); + } + /** Add the given funcotation to the given transcript ID. + * + * @param txId Never {@code null} + * @param funcotation Never {@code null} + */ + public void add(final String txId, final Funcotation funcotation) { + Utils.nonNull(txId); + Utils.nonNull(funcotation); + final LinkedHashSet existingFuncotationsToUpdate = txToFuncotations.getOrDefault(txId, new LinkedHashSet<>()); + existingFuncotationsToUpdate.add(funcotation); + txToFuncotations.put(txId, existingFuncotationsToUpdate); + } + + /** + * Get the key list (i.e. transcript IDs) in order. + * + * @return Never {@code null} + */ + public List keyList() { + return new ArrayList<>(txToFuncotations.keySet()); + } + + /** Create a FuncotationMap where all Funcotations will be TableFuncotations. This is useful for parsing existing + * VCFs. + * Only renders for a single allele. + * See {@link FuncotatorUtils#extractFuncotatorKeysFromHeaderDescription(String)} for getting the funcotation keys. + * + * @param transcriptFieldName The field name to use for transcript IDs. Use {@see NO_TRANSCRIPT_AVAILABLE_KEY} if unknown. + * If not in the funcotation keys, then the Funcotation map will be created with one transcript ID, {@see NO_TRANSCRIPT_AVAILABLE_KEY} + * Never {@code null} + * @param funcotationKeys The ordered keys of the funcotation field. Never {@code null} + * @param funcotationAttributeForSingleAllele The funcotation attribute from a VCF. Never {@code null} + * @param altAllele The alternate allele for the created funcotations. Never {@code null} + * @param datasourceName The datasource name to use for all of the created funcotatinos. Never {@code null} + * @return a funcotation map. Note that no funcotations will be GencodeFuncotations. Never {@code null} + */ + public static FuncotationMap createAsAllTableFuncotationsFromVcf(final String transcriptFieldName, final String[] funcotationKeys, + final String funcotationAttributeForSingleAllele, final Allele altAllele, + final String datasourceName) { + Utils.nonNull(transcriptFieldName); + Utils.nonNull(funcotationKeys); + Utils.nonNull(funcotationAttributeForSingleAllele); + Utils.nonNull(altAllele); + Utils.nonNull(datasourceName); + + final FuncotationMap result = createEmpty(); + final String[] funcotationAttributeForSingleAlleleByTranscript = StringUtils.splitByWholeSeparator(funcotationAttributeForSingleAllele, "]#["); + + for (final String funcotationAttribute : funcotationAttributeForSingleAlleleByTranscript) { + final String[] values = StringUtils.splitByWholeSeparatorPreserveAllTokens(funcotationAttribute, "|"); + if (values[0].startsWith(VcfOutputRenderer.START_TRANSCRIPT_DELIMITER)) { + values[0] = values[0].replace(VcfOutputRenderer.START_TRANSCRIPT_DELIMITER, ""); + } + if (values[values.length - 1].endsWith(VcfOutputRenderer.END_TRANSCRIPT_DELIMITER)) { + values[values.length - 1] = values[values.length - 1].replace(VcfOutputRenderer.END_TRANSCRIPT_DELIMITER, ""); + } + if (values.length != funcotationKeys.length) { + logger.error("Keys: " + StringUtils.join(funcotationKeys, ", ")); + logger.error("Values: " + StringUtils.join(values, ", ")); + throw new GATKException.ShouldNeverReachHereException("Cannot parse the funcotation attribute. Num values: " + values.length + " Num keys: " + funcotationKeys.length); + } + final Map simpleNameValuePairs = IntStream.range(0, values.length).boxed().collect(Collectors + .toMap(i -> funcotationKeys[i], i-> values[i])); + + final List valuesAsList = Arrays.asList(funcotationKeys).stream().map(k -> simpleNameValuePairs.get(k)).collect(Collectors.toList()); + result.add(simpleNameValuePairs.getOrDefault(transcriptFieldName, NO_TRANSCRIPT_AVAILABLE_KEY), new TableFuncotation(Arrays.asList(funcotationKeys), valuesAsList, altAllele, datasourceName)); + + } + return result; + } + + private static boolean areDuplicateTranscriptIDsFound(final List gencodeFuncotations) { + return gencodeFuncotations.size() != new HashSet<>(gencodeFuncotations).size(); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotator.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotator.java index ee6fbcb463e..f749fb39e58 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotator.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotator.java @@ -3,7 +3,6 @@ import htsjdk.tribble.Feature; import htsjdk.tribble.util.ParsingUtils; import htsjdk.variant.variantcontext.VariantContext; -import htsjdk.variant.variantcontext.VariantContextBuilder; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.broadinstitute.barclay.argparser.Argument; @@ -31,6 +30,7 @@ import java.nio.file.Path; import java.util.*; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Funcotator (FUNCtional annOTATOR) analyzes given variants for their function (as retrieved from a set of data sources) and produces the analysis in a specified output file. @@ -264,21 +264,21 @@ public class Funcotator extends VariantWalker { optional = true, doc = "File to use as a list of transcripts (one transcript ID per line, version numbers are ignored) OR A set of transcript IDs to use for annotation to override selected transcript." ) - protected Set userTranscriptIdSet = FuncotatorArgumentDefinitions.TRANSCRIPT_LIST_DEFAULT_VALUE; + protected Set userTranscriptIdSet = new HashSet<>(); @Argument( fullName = FuncotatorArgumentDefinitions.ANNOTATION_DEFAULTS_LONG_NAME, optional = true, doc = "Annotations to include in all annotated variants if the annotation is not specified in the data sources (in the format :). This will add the specified annotation to every annotated variant if it is not already present." ) - protected List annotationDefaults = FuncotatorArgumentDefinitions.ANNOTATION_DEFAULTS_DEFAULT_VALUE; + protected List annotationDefaults = new ArrayList<>(); @Argument( fullName = FuncotatorArgumentDefinitions.ANNOTATION_OVERRIDES_LONG_NAME, optional = true, doc = "Override values for annotations (in the format :). Replaces existing annotations of the given name with given values." ) - protected List annotationOverrides = FuncotatorArgumentDefinitions.ANNOTATION_OVERRIDES_DEFAULT_VALUE; + protected List annotationOverrides = new ArrayList<>(); @Argument( fullName = FuncotatorArgumentDefinitions.ALLOW_HG19_GENCODE_B37_CONTIG_MATCHING_LONG_NAME, @@ -295,6 +295,13 @@ public class Funcotator extends VariantWalker { ) protected int lookaheadFeatureCachingInBp = FuncotatorArgumentDefinitions.LOOKAHEAD_CACHE_IN_BP_DEFAULT_VALUE; + @Argument( + fullName = FuncotatorArgumentDefinitions.ALLOW_HG19_GENCODE_B37_CONTIG_MATCHING_OVERRIDE_LONG_NAME, + optional = true, + doc = "(Advanced/Use at your own risk) Use in conjunction with allow hg19 contig names with b37. If you also select this flag, no check that your input reference is b37 is actually performed. Otherwise, ignored. Typically, this option is useful in integration tests (written by devs) only." + ) + protected boolean allowHg19ContigNamesWithB37Lenient = false; + //================================================================================================================== private OutputRenderer outputRenderer; @@ -331,7 +338,7 @@ public void onTraversalStart() { final Map configData = DataSourceUtils.getAndValidateDataSourcesFromPaths(referenceVersion, dataSourceDirectories); initializeManualFeaturesForLocatableDataSources(configData); dataSourceFactories.addAll( - DataSourceUtils.createDataSourceFuncotationFactoriesForDataSources(configData, annotationOverridesMap, transcriptSelectionMode, userTranscriptIdSet) + DataSourceUtils.createDataSourceFuncotationFactoriesForDataSources(configData, annotationOverridesMap, transcriptSelectionMode, userTranscriptIdSet, allowHg19ContigNamesWithB37) ); // Sort our data source factories to ensure they're always in the same order: gencode datasources first @@ -452,31 +459,23 @@ private void enqueueAndHandleVariant(final VariantContext variant, final Referen // Get our feature inputs: final Map> featureSourceMap = new HashMap<>(); - // Create a variant context for annotation that has a new contig based on whether we need to overwrite the - // contig names in the next section. - VariantContext variantContextFixedContigForDataSources = variant; - // Check to see if we need to query with a different reference convention (i.e. "chr1" vs "1"). - if (allowHg19ContigNamesWithB37 && inputReferenceIsB37) { - final VariantContextBuilder variantContextBuilderForFixedContigForDataSources = new VariantContextBuilder(variant); + if (allowHg19ContigNamesWithB37 && (inputReferenceIsB37 || allowHg19ContigNamesWithB37Lenient)) { // Construct a new contig and new interval with no "chr" in front of it: final String hg19Contig = FuncotatorUtils.convertB37ContigToHg19Contig( variant.getContig() ); final SimpleInterval hg19Interval = new SimpleInterval(hg19Contig, variant.getStart(), variant.getEnd()); - variantContextBuilderForFixedContigForDataSources.chr(hg19Contig); - // Get our features for the new interval: for ( final FeatureInput featureInput : manualLocatableFeatureInputs ) { @SuppressWarnings("unchecked") final List featureList = (List)featureContext.getValues(featureInput, hg19Interval); - - // TODO: This is a little sloppy, since it checks every datasource twice. Once for hg19 contig names and once for b37 contig names. See https://github.com/broadinstitute/gatk/issues/4798 - featureList.addAll(featureContext.getValues(featureInput)); + if (featureList.size() == 0) { + // TODO: This is a little sloppy, since it checks every datasource twice. Once for hg19 contig names and once for b37 contig names. See https://github.com/broadinstitute/gatk/issues/4798 + featureList.addAll(featureContext.getValues(featureInput)); + } featureSourceMap.put( featureInput.getName(), featureList); } - // Get our VariantContext for annotation: - variantContextFixedContigForDataSources = variantContextBuilderForFixedContigForDataSources.make(); } else { for ( final FeatureInput featureInput : manualLocatableFeatureInputs ) { @@ -486,29 +485,39 @@ private void enqueueAndHandleVariant(final VariantContext variant, final Referen } } - // Create a place to keep our funcotations: - final List funcotations = new ArrayList<>(); + // Create only the gencode funcotations. + if (retriveGencodeFuncotationFactoryStream().count() > 1) { + logger.warn("Attempting to annotate with more than one GENCODE datasource. This is not supported."); + } + + final List transcriptFuncotations = retriveGencodeFuncotationFactoryStream() + .map(gf -> gf.createFuncotations(variant, referenceContext, featureSourceMap)) + .flatMap(List::stream) + .map(gf -> (GencodeFuncotation) gf).collect(Collectors.toList()); - // Annotate with Gencode first: - // Create a list of GencodeFuncotation to use for other Data Sources: - final List gencodeFuncotations = new ArrayList<>(); + // Create a place to keep our funcotations: + final FuncotationMap funcotationMap = FuncotationMap.createFromGencodeFuncotations(transcriptFuncotations); - // Perform the actual annotation. Note that we leverage the ordering of datasources here (i.e. that gencode/transcript - // datasources always appear first) + // Perform the rest of the annotation. Note that this code manually excludes the Gencode Funcotations. for (final DataSourceFuncotationFactory funcotationFactory : dataSourceFactories ) { - if (funcotationFactory.getType().equals(FuncotatorArgumentDefinitions.DataSourceType.GENCODE)) { - final List funcotationsFromGencodeFactory = funcotationFactory.createFuncotations(variantContextFixedContigForDataSources, referenceContext, featureSourceMap); - funcotations.addAll(funcotationsFromGencodeFactory); - gencodeFuncotations.addAll( - funcotationsFromGencodeFactory.stream() - .map(x -> (GencodeFuncotation)x) - .collect(Collectors.toList())); - } else { - funcotations.addAll( funcotationFactory.createFuncotations(variantContextFixedContigForDataSources, referenceContext, featureSourceMap, gencodeFuncotations) ); + + // Note that this guarantees that we do not add GencodeFuncotations a second time. + if (!funcotationFactory.getType().equals(FuncotatorArgumentDefinitions.DataSourceType.GENCODE)) { + final List txIds = funcotationMap.keyList(); + + for (final String txId: txIds) { + funcotationMap.add(txId, funcotationFactory.createFuncotations(variant, referenceContext, featureSourceMap, funcotationMap.getGencodeFuncotations(txId))); + } } } - outputRenderer.write(variant, funcotations); + // At this point there is only one transcript ID in the funcotation map if canonical or best effect are selected + outputRenderer.write(variant, funcotationMap); + } + + private Stream retriveGencodeFuncotationFactoryStream() { + return dataSourceFactories.stream() + .filter(f -> f.getType().equals(FuncotatorArgumentDefinitions.DataSourceType.GENCODE)); } /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorArgumentDefinitions.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorArgumentDefinitions.java index 03136df1f64..6db180434d5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorArgumentDefinitions.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorArgumentDefinitions.java @@ -6,7 +6,7 @@ import org.broadinstitute.hellbender.tools.funcotator.dataSources.xsv.SimpleKeyXsvFuncotationFactory; import java.nio.file.Path; -import java.util.*; +import java.util.Properties; /** * Class to store argument definitions specific to {@link Funcotator}. @@ -33,14 +33,21 @@ public class FuncotatorArgumentDefinitions { public static final String TRANSCRIPT_SELECTION_MODE_LONG_NAME = "transcript-selection-mode"; public static final TranscriptSelectionMode TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE = TranscriptSelectionMode.CANONICAL; + /** + * Do not give this a static default value or the integration tests will get hosed. + */ public static final String TRANSCRIPT_LIST_LONG_NAME = "transcript-list"; - public static final Set TRANSCRIPT_LIST_DEFAULT_VALUE = new HashSet<>(); + /** + * Do not give this a static default value or the integration tests will get hosed. + */ public static final String ANNOTATION_DEFAULTS_LONG_NAME = "annotation-default"; - public static final List ANNOTATION_DEFAULTS_DEFAULT_VALUE = new ArrayList<>(); + /** + * Do not give this a static default value or the integration tests will get hosed. + */ public static final String ANNOTATION_OVERRIDES_LONG_NAME = "annotation-override"; - public static final List ANNOTATION_OVERRIDES_DEFAULT_VALUE = new ArrayList<>(); + public static final String ALLOW_HG19_GENCODE_B37_CONTIG_MATCHING_LONG_NAME = "allow-hg19-gencode-b37-contig-matching"; @@ -50,6 +57,8 @@ public class FuncotatorArgumentDefinitions { public static final String LOOKAHEAD_CACHE_IN_BP_NAME = "lookahead-cache-bp"; public static final int LOOKAHEAD_CACHE_IN_BP_DEFAULT_VALUE = VariantWalkerBase.FEATURE_CACHE_LOOKAHEAD; + public static final String ALLOW_HG19_GENCODE_B37_CONTIG_MATCHING_OVERRIDE_LONG_NAME = "allow-hg19-gencode-b37-contig-matching-override"; + // ------------------------------------------------------------ // Helper Types: diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorUtils.java index 82275142a8b..63c97d7d964 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorUtils.java @@ -8,11 +8,13 @@ import htsjdk.tribble.annotation.Strand; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; +import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation; +import org.broadinstitute.hellbender.tools.funcotator.vcfOutput.VcfOutputRenderer; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.io.Resource; @@ -25,6 +27,7 @@ import java.io.IOException; import java.util.*; import java.util.stream.Collectors; +import java.util.stream.IntStream; public final class FuncotatorUtils { @@ -1998,4 +2001,55 @@ public TranscriptCodingSequenceException( final String msg, final Throwable thro super(msg, throwable); } } + + /** + * @param funcotationHeaderDescription The raw description of the funcotation info field. Never {@code null} + * @return Array of the keys, in proper order. Never {@code null} + */ + public static String[] extractFuncotatorKeysFromHeaderDescription(final String funcotationHeaderDescription) { + Utils.nonNull(funcotationHeaderDescription); + + final String[] descriptionSplit = StringUtils.splitByWholeSeparatorPreserveAllTokens(funcotationHeaderDescription, ": "); + return StringUtils.splitByWholeSeparatorPreserveAllTokens(descriptionSplit[1], "|"); + } + + /** + * Make sure that an individual funcotation (i.e. single value of a funcotation) is sanitized for VCF consumption. + * Particularly, make sure that it does not allow special characters that would interfere with VCF parsing. + * @param individualFuncotation value from a funcotation Never {@code null} + * @return input string with special characters replaced by _%HEX%_ where HEX is the 2 digit ascii hex code. + */ + public static String sanitizeFuncotationForVcf(final String individualFuncotation) { + Utils.nonNull(individualFuncotation); + return StringUtils.replaceEach(individualFuncotation, new String[]{",", ";", "=", "\t", "|"}, new String[]{"_%2C_", "_%3B_", "_%3D_", "_%09_", "_%7C_"}); + } + + /** + * Create a mapping for a single variant. The mapping are the variant allele(s) to a FuncotationMap {@link FuncotationMap} + * This is lossy, since the attribute cannot possibly store the type of funcotation. + * + * @param funcotationHeaderKeys {@link FuncotatorUtils#extractFuncotatorKeysFromHeaderDescription(String)} for + * getting this parameter from a VCFHeader entry. Never {@code null} + * @param v the variant to use in creating the map. Never {@code null} + * @param transcriptIdFuncotationName The field name to use for determining the transcript ID. Use {@link FuncotationMap#NO_TRANSCRIPT_AVAILABLE_KEY} if unknown. + * If not in the funcotation keys, then the funcotation maps will be created with one transcript ID, + * {@link FuncotationMap#NO_TRANSCRIPT_AVAILABLE_KEY}. Never {@code null} + * @return Never {@code null} + */ + public static Map createAlleleToFuncotationMapFromFuncotationVcfAttribute(final String[] funcotationHeaderKeys, final VariantContext v, final String transcriptIdFuncotationName) { + Utils.nonNull(funcotationHeaderKeys); + Utils.nonNull(v); + Utils.nonNull(transcriptIdFuncotationName); + final String rawFuncotationAttribute = v.getAttributeAsString(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME, ""); + final List funcotationPerAllele = Arrays.asList(StringUtils.split(rawFuncotationAttribute, ",")); + if (v.getAlternateAlleles().size() != funcotationPerAllele.size()) { + throw new GATKException.ShouldNeverReachHereException("Could not parse FUNCOTATION field properly."); + } + + return IntStream.range(0, v.getAlternateAlleles().size()).boxed() + .collect(Collectors + .toMap(i -> v.getAlternateAllele(i), i -> FuncotationMap.createAsAllTableFuncotationsFromVcf(transcriptIdFuncotationName, + funcotationHeaderKeys, funcotationPerAllele.get(i), v.getAlternateAllele(i), "TEST"))); + } } + diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/OutputRenderer.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/OutputRenderer.java index d57835fa7aa..1b40adf6fef 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/OutputRenderer.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/OutputRenderer.java @@ -19,7 +19,7 @@ public abstract class OutputRenderer implements AutoCloseable { //================================================================================================================== /** * {@link LinkedHashMap} of manually specified annotations to add to each output in addition to annotations provided - * to {@link OutputRenderer#write(VariantContext, List)}. + * to {@link OutputRenderer#write(VariantContext, FuncotationMap)}. */ protected LinkedHashMap manualAnnotations; @@ -51,9 +51,9 @@ public String getDataSourceInfoString() { public abstract void close(); /** - * Write the given {@code variant} and {@code funcotations} to the output file. + * Write the given {@code variant} and {@code txToFuncotationMap} to the output file. * @param variant {@link VariantContext} to write to the file. - * @param funcotations {@link List} of {@link Funcotation} to add to the given {@code variant} on output. + * @param txToFuncotationMap {@link FuncotationMap} to add to the given {@code variant} on output. */ - public abstract void write(final VariantContext variant, final List funcotations); + public abstract void write(final VariantContext variant, final FuncotationMap txToFuncotationMap); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/TranscriptSelectionMode.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/TranscriptSelectionMode.java index 3335f5a2956..1897b503785 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/TranscriptSelectionMode.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/TranscriptSelectionMode.java @@ -162,6 +162,15 @@ public Comparator getComparator(final Set userReques public Comparator getComparator(final Set userRequestedTranscripts) { return new CannonicalGencodeFuncotationComparator(userRequestedTranscripts); } + }, + + /** + * Same as CANONICAL, but indicates that no transcripts should be dropped. Render all overlapping transcripts. + */ + ALL { + public Comparator getComparator(final Set userRequestedTranscripts) { + return new CannonicalGencodeFuncotationComparator(userRequestedTranscripts); + } }; public abstract Comparator getComparator(final Set userRequestedTranscripts); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java index e37ffe8f3d6..16350850e7f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java @@ -232,7 +232,8 @@ public static boolean isValidDirectory(final Path p) { public static List createDataSourceFuncotationFactoriesForDataSources(final Map dataSourceMetaData, final LinkedHashMap annotationOverridesMap, final TranscriptSelectionMode transcriptSelectionMode, - final Set userTranscriptIdSet) { + final Set userTranscriptIdSet, + boolean isAllowingNoChrMatchesForTranscripts) { Utils.nonNull(dataSourceMetaData); Utils.nonNull(annotationOverridesMap); @@ -265,7 +266,7 @@ public static List createDataSourceFuncotationFact funcotationFactory = DataSourceUtils.createCosmicDataSource(path, properties, annotationOverridesMap); break; case GENCODE: - funcotationFactory = DataSourceUtils.createGencodeDataSource(path, properties, annotationOverridesMap, transcriptSelectionMode, userTranscriptIdSet); + funcotationFactory = DataSourceUtils.createGencodeDataSource(path, properties, annotationOverridesMap, transcriptSelectionMode, userTranscriptIdSet, isAllowingNoChrMatchesForTranscripts); break; case VCF: funcotationFactory = DataSourceUtils.createVcfDataSource(path, properties, annotationOverridesMap, transcriptSelectionMode, userTranscriptIdSet); @@ -375,13 +376,14 @@ public static CosmicFuncotationFactory createCosmicDataSource(final Path dataSou * @param annotationOverridesMap {@link LinkedHashMap}{@code String>} containing any annotation overrides to be included in the resulting data source. Must not be {@code null}. * @param transcriptSelectionMode {@link TranscriptSelectionMode} to use when choosing the transcript for detailed reporting. Must not be {@code null}. * @param userTranscriptIdSet {@link Set} of {@link String}s containing transcript IDs of interest to be selected for first. Must not be {@code null}. + * @param isAllowingNoChrMatchesForTranscripts Whether the datasource should disregard chr for a contig match. * @return A new {@link GencodeFuncotationFactory} based on the given data source file information, field overrides map, and transcript information. */ public static GencodeFuncotationFactory createGencodeDataSource(final Path dataSourceFile, final Properties dataSourceProperties, final LinkedHashMap annotationOverridesMap, final TranscriptSelectionMode transcriptSelectionMode, - final Set userTranscriptIdSet) { + final Set userTranscriptIdSet, final boolean isAllowingNoChrMatchesForTranscripts) { Utils.nonNull(dataSourceFile); Utils.nonNull(dataSourceProperties); @@ -400,7 +402,8 @@ public static GencodeFuncotationFactory createGencodeDataSource(final Path dataS name, transcriptSelectionMode, userTranscriptIdSet, - annotationOverridesMap + annotationOverridesMap, + isAllowingNoChrMatchesForTranscripts ); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/TableFuncotation.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/TableFuncotation.java index 121f681fb58..5024033e4a1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/TableFuncotation.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/TableFuncotation.java @@ -4,6 +4,7 @@ import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.funcotator.Funcotation; +import org.broadinstitute.hellbender.tools.funcotator.FuncotatorUtils; import org.broadinstitute.hellbender.tools.funcotator.dataSources.xsv.LocatableXsvFuncotationFactory; import org.broadinstitute.hellbender.tools.funcotator.dataSources.xsv.SimpleKeyXsvFuncotationFactory; import org.broadinstitute.hellbender.tools.funcotator.vcfOutput.VcfOutputRenderer; @@ -128,7 +129,7 @@ public void setFieldSerializationOverrideValue(final String fieldName, final Str @Override public String serializeToVcfString() { return fieldMap.values().stream() - .map(f -> (f == null ? "" : f)) + .map(f -> (f == null ? "" : FuncotatorUtils.sanitizeFuncotationForVcf(f))) .collect(Collectors.joining(VcfOutputRenderer.FIELD_DELIMITER)); } @@ -147,6 +148,11 @@ public String getField(final String fieldName) { } } + @Override + public boolean hasField(final String fieldName) { + return fieldMap.containsKey(fieldName); + } + @Override public boolean equals(final Object o) { if ( this == o ) return true; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotation.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotation.java index 48d34a324b3..810c2cf98d3 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotation.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotation.java @@ -4,6 +4,7 @@ import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.funcotator.Funcotation; +import org.broadinstitute.hellbender.tools.funcotator.FuncotatorUtils; import org.broadinstitute.hellbender.tools.funcotator.vcfOutput.VcfOutputRenderer; import org.broadinstitute.hellbender.utils.codecs.gencode.GencodeGtfFeature; import org.broadinstitute.hellbender.utils.codecs.gencode.GencodeGtfGeneFeature; @@ -14,8 +15,10 @@ import java.util.stream.Collectors; /** - * A class to represent a Functional Annotation from the Gencode data source. + * A class to represent a Functional Annotation. Each instance represents the annotations on a single transcript. * Created by jonn on 8/22/17. + * + * TODO: This will likely need to be renamed TranscriptFuncotation (or will have to implement an interface for TranscriptFuncotation) in order to handle non-gencode transcript sources. */ public class GencodeFuncotation implements Funcotation { @@ -173,31 +176,35 @@ public Allele getAltAllele() { public String serializeToVcfString() { // Alias for the FIELD_DELIMITER so we can have nicer looking code: final String DELIMITER = VcfOutputRenderer.FIELD_DELIMITER; - + //TODO See issue https://github.com/broadinstitute/gatk/issues/4797 + // TODO: Sanitize for VCF. Not quite as easy as it looks. // After the manual string, we check to see if we have an override first and if not we get the set field value: - return (hugoSymbolSerializedOverride != null ? hugoSymbolSerializedOverride : (hugoSymbol != null ? hugoSymbol : "")) + DELIMITER + - (ncbiBuildSerializedOverride != null ? ncbiBuildSerializedOverride : (ncbiBuild != null ? ncbiBuild : "")) + DELIMITER + - (chromosomeSerializedOverride != null ? chromosomeSerializedOverride : (chromosome != null ? chromosome : "")) + DELIMITER + - (startSerializedOverride != null ? startSerializedOverride : start) + DELIMITER + - (endSerializedOverride != null ? endSerializedOverride : end) + DELIMITER + - (variantClassificationSerializedOverride != null ? variantClassificationSerializedOverride : (variantClassification != null ? variantClassification : "")) + DELIMITER + - (secondaryVariantClassificationSerializedOverride != null ? secondaryVariantClassificationSerializedOverride : (secondaryVariantClassification != null ? secondaryVariantClassification : "")) + DELIMITER + - (variantTypeSerializedOverride != null ? variantTypeSerializedOverride : (variantType != null ? variantType : "")) + DELIMITER + - (refAlleleSerializedOverride != null ? refAlleleSerializedOverride : (refAllele != null ? refAllele : "")) + DELIMITER + + final List funcotations = Arrays.asList((hugoSymbolSerializedOverride != null ? hugoSymbolSerializedOverride : (hugoSymbol != null ? hugoSymbol : "")), + (ncbiBuildSerializedOverride != null ? ncbiBuildSerializedOverride : (ncbiBuild != null ? ncbiBuild : "")), + (chromosomeSerializedOverride != null ? chromosomeSerializedOverride : (chromosome != null ? chromosome : "")), + (startSerializedOverride != null ? startSerializedOverride : String.valueOf(start)), + (endSerializedOverride != null ? endSerializedOverride : String.valueOf(end)), + (variantClassificationSerializedOverride != null ? variantClassificationSerializedOverride : (variantClassification != null ? variantClassification.toString() : "")), + (secondaryVariantClassificationSerializedOverride != null ? secondaryVariantClassificationSerializedOverride : (secondaryVariantClassification != null ? secondaryVariantClassification.toString() : "")), + (variantTypeSerializedOverride != null ? variantTypeSerializedOverride : (variantType != null ? variantType.toString() : "")), + (refAlleleSerializedOverride != null ? refAlleleSerializedOverride : (refAllele != null ? refAllele : "")), // NOTE: Ref allele gets serialized as the tumorSeqAllele1 as well, but we have to account for the override: - (tumorSeqAllele1SerializedOverride != null ? tumorSeqAllele1SerializedOverride : (refAllele != null ? refAllele : "")) + DELIMITER + - (tumorSeqAllele2SerializedOverride != null ? tumorSeqAllele2SerializedOverride : (tumorSeqAllele2 != null ? tumorSeqAllele2 : "")) + DELIMITER + - (genomeChangeSerializedOverride != null ? genomeChangeSerializedOverride : (genomeChange != null ? genomeChange : "")) + DELIMITER + - (annotationTranscriptSerializedOverride != null ? annotationTranscriptSerializedOverride : (annotationTranscript != null ? annotationTranscript : "")) + DELIMITER + - (transcriptStrandSerializedOverride != null ? transcriptStrandSerializedOverride : (transcriptStrand != null ? transcriptStrand : "")) + DELIMITER + - (transcriptExonSerializedOverride != null ? transcriptExonSerializedOverride : (transcriptExon != null ? transcriptExon : "")) + DELIMITER + - (transcriptPosSerializedOverride != null ? transcriptPosSerializedOverride : (transcriptPos != null ? transcriptPos : "")) + DELIMITER + - (cDnaChangeSerializedOverride != null ? cDnaChangeSerializedOverride : (cDnaChange != null ? cDnaChange : "")) + DELIMITER + - (codonChangeSerializedOverride != null ? codonChangeSerializedOverride : (codonChange != null ? codonChange : "")) + DELIMITER + - (proteinChangeSerializedOverride != null ? proteinChangeSerializedOverride : (proteinChange != null ? proteinChange : "")) + DELIMITER + - (gcContentSerializedOverride != null ? gcContentSerializedOverride : (gcContent != null ? gcContent : "")) + DELIMITER + - (referenceContextSerializedOverride != null ? referenceContextSerializedOverride : (referenceContext != null ? referenceContext : "")) + DELIMITER + - (otherTranscriptsSerializedOverride != null ? otherTranscriptsSerializedOverride : (otherTranscripts != null ? otherTranscripts.stream().map(Object::toString).collect(Collectors.joining(VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER)) : "")); + (tumorSeqAllele1SerializedOverride != null ? tumorSeqAllele1SerializedOverride : (refAllele != null ? refAllele : "")), + (tumorSeqAllele2SerializedOverride != null ? tumorSeqAllele2SerializedOverride : (tumorSeqAllele2 != null ? tumorSeqAllele2 : "")), + (genomeChangeSerializedOverride != null ? genomeChangeSerializedOverride : (genomeChange != null ? genomeChange : "")), + (annotationTranscriptSerializedOverride != null ? annotationTranscriptSerializedOverride : (annotationTranscript != null ? annotationTranscript : "")), + (transcriptStrandSerializedOverride != null ? transcriptStrandSerializedOverride : (transcriptStrand != null ? transcriptStrand : "")), + (transcriptExonSerializedOverride != null ? transcriptExonSerializedOverride : (transcriptExon != null ? transcriptExon.toString() : "")), + (transcriptPosSerializedOverride != null ? transcriptPosSerializedOverride : (transcriptPos != null ? transcriptPos.toString() : "")), + (cDnaChangeSerializedOverride != null ? cDnaChangeSerializedOverride : (cDnaChange != null ? cDnaChange : "")), + (codonChangeSerializedOverride != null ? codonChangeSerializedOverride : (codonChange != null ? codonChange : "")), + (proteinChangeSerializedOverride != null ? proteinChangeSerializedOverride : (proteinChange != null ? proteinChange : "")), + (gcContentSerializedOverride != null ? gcContentSerializedOverride : (gcContent != null ? gcContent.toString() : "")), + (referenceContextSerializedOverride != null ? referenceContextSerializedOverride : (referenceContext != null ? referenceContext : "")), + (otherTranscriptsSerializedOverride != null ? otherTranscriptsSerializedOverride : (otherTranscripts != null ? otherTranscripts.stream().map(Object::toString).collect(Collectors.joining(VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER)) : "")) + ); + + return funcotations.stream().map(f -> FuncotatorUtils.sanitizeFuncotationForVcf(f)).collect(Collectors.joining(DELIMITER)); } @Override @@ -328,6 +335,13 @@ public String getField(final String fieldName) { throw new GATKException(this.getClass().getSimpleName() + ": Does not contain field: " + fieldName); } + @Override + public boolean hasField(final String fieldName) { + final LinkedHashSet fieldNames = getFieldNames(); + final String altFieldName = getDataSourceName() + "_" + version + "_" + fieldName; + return ( fieldNames.contains(fieldName) || fieldNames.contains(altFieldName) ); + } + //================================================================================================================== @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactory.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactory.java index 7ca19121a52..1e66819ac58 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactory.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactory.java @@ -8,6 +8,7 @@ import htsjdk.tribble.annotation.Strand; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.broadinstitute.hellbender.engine.ReferenceContext; @@ -26,6 +27,7 @@ import java.nio.file.Path; import java.util.*; +import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -147,6 +149,13 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory { */ private final TranscriptSelectionMode transcriptSelectionMode; + /** + * Whether this factory will disregard the string "chr" in matching contig names. + * + * Setting this to true is useful in cases of b37 variants (e.g. contig 3) matching to gencode (e.g. contig chr3) + */ + private boolean isAllowingNoChrMatches = false; + /** * {@link List} of Transcript IDs that the user has requested that we annotate. * If this list is empty, will default to keeping ALL transcripts. @@ -179,7 +188,7 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile, final String name, final TranscriptSelectionMode transcriptSelectionMode, final Set userRequestedTranscripts, - final LinkedHashMap annotationOverrides) { + final LinkedHashMap annotationOverrides, boolean isAllowingNoChrMatches) { this.gencodeTranscriptFastaFile = gencodeTranscriptFastaFile; @@ -203,6 +212,8 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile, // Initialize overrides / defaults: initializeAnnotationOverrides( annotationOverrides ); + + this.isAllowingNoChrMatches = isAllowingNoChrMatches; } //================================================================================================================== @@ -264,6 +275,50 @@ protected List createDefaultFuncotationsOnVariant( final VariantCon return funcotationList; } + private List createAndFilterGencodeFuncotationsByTranscript(final VariantContext variant, final ReferenceContext referenceContext, final Allele altAllele, final GencodeGtfGeneFeature feature) { + // By this point we know the feature type is correct, so we cast it: + final List gencodeFuncotationList = createFuncotationsHelper(variant, altAllele, feature, referenceContext); + sortAndFilterInPlace(gencodeFuncotationList); + + if ((this.transcriptSelectionMode != TranscriptSelectionMode.ALL) && (gencodeFuncotationList.size() > 0)) { + return Collections.singletonList(gencodeFuncotationList.get(0)); + } + + return gencodeFuncotationList; + } + + private void sortAndFilterInPlace(final List gencodeFuncotationList) { + if (gencodeFuncotationList.size() > 0) { + // Get our "Best Transcript" from our list. + sortFuncotationsByTranscriptForOutput(gencodeFuncotationList); + + + // Now we have to filter out the output gencodeFuncotations if they are not on the list the user provided: + // TODO: Is this correct behavior? The sorting takes care of ordering the transcripts. + filterAnnotationsByUserTranscripts(gencodeFuncotationList, userRequestedTranscripts); + + // Since the initial query was done on the entire gene footprint, we need to get rid of every transcript that does not overlap the variant at all (not even in flank) + // i.e. IGR. + filterAnnotationsByIGR(gencodeFuncotationList); + + populateOtherTranscriptsMapForFuncotation(gencodeFuncotationList); + } + } + + private void populateOtherTranscriptsMapForFuncotation(final List gencodeFuncotations) { + // First create a map that goes from each given funcotation to its condensed version. + final Map funcotationToCondensedString = gencodeFuncotations.stream() + .collect(Collectors.toMap(Function.identity(), f -> condenseGencodeFuncotation(f), (x,y) -> y, LinkedHashMap::new)); + + for (final GencodeFuncotation g: gencodeFuncotations) { + final List otherTranscriptStrings = funcotationToCondensedString.keySet().stream() + .filter(f -> !f.equals(g)) + .map(f -> funcotationToCondensedString.get(f)) + .collect(Collectors.toList()); + g.setOtherTranscripts(otherTranscriptStrings); + } + } + @Override /** * Attempts to treat the given features as {@link GencodeGtfFeature} objects in order to @@ -279,15 +334,11 @@ protected List createFuncotationsOnVariant(final VariantContext var if ( (feature != null) ) { // By this point we know the feature type is correct, so we cast it: - final List gencodeFuncotationList = createFuncotationsHelper(variant, altAllele, (GencodeGtfGeneFeature) feature, referenceContext); - - // Now we have to filter out the output gencodeFuncotations if they are not on the list the user provided: - filterAnnotationsByUserTranscripts( gencodeFuncotationList, userRequestedTranscripts ); + final List gencodeFuncotationList = createAndFilterGencodeFuncotationsByTranscript(variant, referenceContext, altAllele, (GencodeGtfGeneFeature) feature); // Add the filtered funcotations here: outputFuncotations.addAll(gencodeFuncotationList); } - // TODO: Actually you may want to put another IGR creation here for now... This may be a more difficult thing if we determine it in here. There is no way to know if these are IGRs or simply not included in this particular data set. } } } @@ -330,6 +381,14 @@ static void filterAnnotationsByUserTranscripts( final List f } } + /** + * Filter the given list of {@link GencodeFuncotation} to only contain those funcotations that are NOT IGR. + * @param funcotations The {@link List} of {@link GencodeFuncotation} to filter. + */ + static void filterAnnotationsByIGR(final List funcotations) { + funcotations.removeIf( f -> f.getVariantClassification().equals(GencodeFuncotation.VariantClassification.IGR)); + } + /** * Creates a map of Transcript IDs for use in looking up transcripts from the FASTA dictionary for the GENCODE Transcripts. * We include the start and stop codons in the transcripts so we can handle start/stop codon variants. @@ -518,58 +577,43 @@ List createFuncotationsHelper(final VariantContext variant, // Go through and annotate all our non-best transcripts: final List otherTranscriptsCondensedAnnotations = new ArrayList<>(); - for ( final GencodeGtfTranscriptFeature transcript : gtfFeature.getTranscripts() ) { - // Check if this transcript has the `basic` tag: - final boolean isBasic = transcript.getOptionalFields().stream() - .filter( f -> f.getName().equals("tag") ) - .filter( f -> f.getValue() instanceof GencodeGtfFeature.FeatureTag ) - .filter( f -> f.getValue().equals(GencodeGtfFeature.FeatureTag.BASIC) ) - .count() > 0; + final List basicTranscripts = gtfFeature.getTranscripts().stream() + .filter(GencodeFuncotationFactory::isBasic).collect(Collectors.toList()); - // Only annotate on the `basic` transcripts: - if ( isBasic ) { + // Only annotate on the `basic` transcripts: + for ( final GencodeGtfTranscriptFeature transcript : basicTranscripts ) { - // Try to create the annotation: - try { - final GencodeFuncotation gencodeFuncotation = createGencodeFuncotationOnTranscript(variant, altAllele, gtfFeature, reference, transcript); + // Try to create the annotation: + try { + final GencodeFuncotation gencodeFuncotation = createGencodeFuncotationOnTranscript(variant, altAllele, gtfFeature, reference, transcript); - // Add it into our transcript: - outputFuncotations.add(gencodeFuncotation); - - } - catch ( final FuncotatorUtils.TranscriptCodingSequenceException ex ) { - //TODO: This should never happen, but needs to be here for some transcripts, such as HG19 MUC16 ENST00000599436.1, where the transcript sequence itself is of length not divisible by 3! (3992) - // There may be other erroneous transcripts too. - otherTranscriptsCondensedAnnotations.add("ERROR_ON_" + transcript.getTranscriptId()); - - logger.warn("Unable to create a GencodeFuncotation on transcript " + transcript.getTranscriptId() + " for variant: " + - variant.getContig() + ":" + variant.getStart() + "-" + variant.getEnd() + "(" + variant.getReference() + " -> " + altAllele + ")" - ); - } + // Add it into our transcript: + outputFuncotations.add(gencodeFuncotation); } - } - - if (outputFuncotations.size() > 0) { - // Get our "Best Transcript" from our list: - sortFuncotationsByTranscriptForOutput(outputFuncotations); - final GencodeFuncotation bestFuncotation = outputFuncotations.remove(0); + catch ( final FuncotatorUtils.TranscriptCodingSequenceException ex ) { + //TODO: This should never happen, but needs to be here for some transcripts, such as HG19 MUC16 ENST00000599436.1, where the transcript sequence itself is of length not divisible by 3! (3992) + // There may be other erroneous transcripts too. + otherTranscriptsCondensedAnnotations.add("ERROR_ON_" + transcript.getTranscriptId()); - // Now convert the other transcripts into summary strings: - for ( final GencodeFuncotation funcotation : outputFuncotations ) { - otherTranscriptsCondensedAnnotations.add(condenseGencodeFuncotation(funcotation)); + logger.warn("Unable to create a GencodeFuncotation on transcript " + transcript.getTranscriptId() + " for variant: " + + variant.getContig() + ":" + variant.getStart() + "-" + variant.getEnd() + "(" + variant.getReference() + " -> " + altAllele + ")" + ); } - - // Set our `other transcripts` annotation in our best funcotation: - bestFuncotation.setOtherTranscripts(otherTranscriptsCondensedAnnotations); - - // Add our best funcotation to the output: - outputFuncotations.add(bestFuncotation); } return outputFuncotations; } + private static boolean isBasic(final GencodeGtfTranscriptFeature transcript) { + // Check if this transcript has the `basic` tag: + return transcript.getOptionalFields().stream() + .filter( f -> f.getName().equals("tag") ) + .filter( f -> f.getValue() instanceof GencodeGtfFeature.FeatureTag ) + .filter( f -> f.getValue().equals(GencodeGtfFeature.FeatureTag.BASIC) ) + .count() > 0; + } + /** * Create a {@link GencodeFuncotation} for a given variant and transcript. * @param variant The {@link VariantContext} to annotate. @@ -588,17 +632,27 @@ private GencodeFuncotation createGencodeFuncotationOnTranscript(final VariantCon // TODO: check for complex INDEL and warn and skip. + VariantContext variantToUse = variant; + // Find the sub-feature of transcript that contains our variant: - final GencodeGtfFeature containingSubfeature = getContainingGtfSubfeature(variant, transcript); + GencodeGtfFeature containingSubfeature = getContainingGtfSubfeature(variantToUse, transcript); + + // If we got no hits, let's check as if this contig was translated to hg19 + if (isAllowingNoChrMatches && containingSubfeature == null) { + final VariantContextBuilder vcb = new VariantContextBuilder(variant); + vcb.chr(FuncotatorUtils.convertB37ContigToHg19Contig(variant.getContig())); + variantToUse = vcb.make(); + containingSubfeature = getContainingGtfSubfeature(variantToUse, transcript); + } // Make sure the sub-regions in the transcript actually contain the variant: // TODO: this is slow, and repeats work that is done later in the process (we call getSortedCdsAndStartStopPositions when creating the sequence comparison) - final int startPosInTranscript = FuncotatorUtils.getStartPositionInTranscript(variant, getSortedCdsAndStartStopPositions(transcript), transcript.getGenomicStrand() ); + final int startPosInTranscript = FuncotatorUtils.getStartPositionInTranscript(variantToUse, getSortedCdsAndStartStopPositions(transcript), transcript.getGenomicStrand() ); // Determine what kind of region we're in and handle it in it's own way: if ( containingSubfeature == null ) { // We have an IGR variant - gencodeFuncotation = createIgrFuncotation(variant, altAllele, reference); + gencodeFuncotation = createIgrFuncotation(variantToUse, altAllele, reference); } else if ( GencodeGtfExonFeature.class.isAssignableFrom(containingSubfeature.getClass()) ) { @@ -610,16 +664,16 @@ else if ( GencodeGtfExonFeature.class.isAssignableFrom(containingSubfeature.getC } else { // We have a coding region variant - gencodeFuncotation = createExonFuncotation(variant, altAllele, gtfFeature, reference, transcript, (GencodeGtfExonFeature) containingSubfeature); + gencodeFuncotation = createExonFuncotation(variantToUse, altAllele, gtfFeature, reference, transcript, (GencodeGtfExonFeature) containingSubfeature); } } else if ( GencodeGtfUTRFeature.class.isAssignableFrom(containingSubfeature.getClass()) ) { // We have a UTR variant - gencodeFuncotation = createUtrFuncotation(variant, altAllele, reference, gtfFeature, transcript, (GencodeGtfUTRFeature) containingSubfeature); + gencodeFuncotation = createUtrFuncotation(variantToUse, altAllele, reference, gtfFeature, transcript, (GencodeGtfUTRFeature) containingSubfeature); } else if ( GencodeGtfTranscriptFeature.class.isAssignableFrom(containingSubfeature.getClass()) ) { // We have an intron variant - gencodeFuncotation = createIntronFuncotation(variant, altAllele, reference, gtfFeature, transcript, reference); + gencodeFuncotation = createIntronFuncotation(variantToUse, altAllele, reference, gtfFeature, transcript, reference); } else { // Uh-oh! Problemz. @@ -1286,18 +1340,18 @@ private static GencodeGtfExonFeature getExonWithinSpliceSiteWindow( final Varian } /** - * Get the subfeature contained in {@code transcript} that contains the given {@code variant}. + * Get the subfeature contained in {@code transcript} that contains the given {@code Locatable}. * The returned subfeature will be of type {@link GencodeGtfFeature} with concrete type based on the type of region * in which the variant is found: * Found in coding region -> {@link GencodeGtfExonFeature} * Found in UTR ->{@link GencodeGtfUTRFeature} * Found in intron ->{@link GencodeGtfTranscriptFeature} * Not Found in transcript ->{@code null} - * @param variant A {@link VariantContext} of which to determine the containing subfeature. + * @param variant A {@link Locatable} of which to determine the containing subfeature. * @param transcript A {@link GencodeGtfTranscriptFeature} in which to find the subfeature containing the given {@code variant}. * @return The {@link GencodeGtfFeature} corresponding to the subfeature of {@code transcript} in which the given {@code variant} was found. */ - private static GencodeGtfFeature getContainingGtfSubfeature(final VariantContext variant, final GencodeGtfTranscriptFeature transcript) { + private static GencodeGtfFeature getContainingGtfSubfeature(final Locatable variant, final GencodeGtfTranscriptFeature transcript) { boolean determinedRegionAlready = false; GencodeGtfFeature subFeature = null; @@ -1804,7 +1858,8 @@ private GencodeFuncotation createIgrFuncotation(final VariantContext variant, .setEnd(variant.getEnd()) .setVariantType(getVariantType(variant.getReference(), altAllele)) // .setNcbiBuild(variant.getUcscGenomeVersion()) - .setChromosome(variant.getContig()); + .setChromosome(variant.getContig()) + .setAnnotationTranscript(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY); // If we have a cached value for the ncbiBuildVersion, we should add it: // NOTE: This will only be true if we have previously annotated a non-IGR variant. diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactory.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactory.java index 7462ceef3c3..e4a3020ae35 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactory.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactory.java @@ -151,7 +151,9 @@ protected List createFuncotationsOnVariant(final VariantContext var // Now we create one funcotation for each Alternate allele: for ( final Allele altAllele : alternateAlleles ) { - + if (!(variantFeature.hasAlternateAllele(altAllele) && variantFeature.getReference().equals(variant.getReference()))) { + continue; + } // Add all Info keys/values to a copy of our default map: final LinkedHashMap annotations = new LinkedHashMap<>(supportedFieldNamesAndDefaults); for ( final Map.Entry entry : variantFeature.getAttributes().entrySet() ) { @@ -159,12 +161,10 @@ protected List createFuncotationsOnVariant(final VariantContext var final String valueString; // Handle collections a little differently: - if ( entry.getValue() instanceof Collection ) { - @SuppressWarnings("unchecked") - final Collection objectList = ((Collection)entry.getValue()); + if (entry.getValue() instanceof Collection) { + @SuppressWarnings("unchecked") final Collection objectList = ((Collection) entry.getValue()); valueString = objectList.stream().map(Object::toString).collect(Collectors.joining(",")); - } - else { + } else { valueString = entry.getValue().toString(); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRenderer.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRenderer.java index c931335569d..b392f48dcf8 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRenderer.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRenderer.java @@ -6,11 +6,10 @@ import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLine; import org.apache.commons.lang.StringUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.broadinstitute.hellbender.exceptions.UserException; -import org.broadinstitute.hellbender.tools.funcotator.DataSourceFuncotationFactory; -import org.broadinstitute.hellbender.tools.funcotator.Funcotation; -import org.broadinstitute.hellbender.tools.funcotator.Funcotator; -import org.broadinstitute.hellbender.tools.funcotator.OutputRenderer; +import org.broadinstitute.hellbender.tools.funcotator.*; import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation; import org.broadinstitute.hellbender.tools.funcotator.vcfOutput.VcfOutputRenderer; import org.broadinstitute.hellbender.utils.Utils; @@ -42,6 +41,8 @@ public class MafOutputRenderer extends OutputRenderer { //================================================================================================================== // Private Static Members: + static final Logger logger = LogManager.getLogger(MafOutputRenderer.class); + /** * Default set of columns to include in this {@link MafOutputRenderer}. * Order of the columns is preserved by the {@link LinkedHashMap}, while still being able to access each field via @@ -207,57 +208,66 @@ public void close() { } @Override - public void write(final VariantContext variant, final List funcotations) { + public void write(final VariantContext variant, final FuncotationMap txToFuncotationMap) { + + if (txToFuncotationMap.keyList().size() > 1) { + logger.warn("MAF typically does not support multiple transcripts per variant, though this should be able to render (grouped by transcript). No user action needed."); + } // Loop through each alt allele in our variant: for ( final Allele altAllele : variant.getAlternateAlleles() ) { + for (final String txId : txToFuncotationMap.keyList()) { + + final List funcotations = txToFuncotationMap.get(txId); - // Create our output maps: - final LinkedHashMap outputMap = new LinkedHashMap<>(defaultMap); - final LinkedHashMap extraFieldOutputMap = new LinkedHashMap<>(); + // Create our output maps: + final LinkedHashMap outputMap = new LinkedHashMap<>(defaultMap); + final LinkedHashMap extraFieldOutputMap = new LinkedHashMap<>(); - // Get our funcotations for this allele and add them to the output maps: - for ( final Funcotation funcotation : funcotations ) { - if ( funcotation.getAltAllele().equals(altAllele) ) { - // Add all the fields from the other funcotations into the extra field output: - for ( final String field : funcotation.getFieldNames() ) { - setField(extraFieldOutputMap, field, funcotation.getField(field)); + // Get our funcotations for this allele and add them to the output maps: + for (final Funcotation funcotation : funcotations) { + if (funcotation.getAltAllele().equals(altAllele)) { + // Add all the fields from the other funcotations into the extra field output: + for (final String field : funcotation.getFieldNames()) { + setField(extraFieldOutputMap, field, funcotation.getField(field)); + } } } - } - // Now add in our annotation overrides so they can be aliased correctly with the outputFieldNameMap: - extraFieldOutputMap.putAll(overrideAnnotations); - - // Go through all output fields and see if any of the names in the value list are in our extraFieldOutputMap. - // For any that match, we remove them from our extraFieldOutputMap and add them to the outputMap with the - // correct key. - for ( final Map.Entry> entry : outputFieldNameMap.entrySet() ) { - for ( final String fieldName : entry.getValue() ) { - if ( extraFieldOutputMap.containsKey(fieldName) ) { - outputMap.put(entry.getKey(), extraFieldOutputMap.remove(fieldName)); - break; + + // Now add in our annotation overrides so they can be aliased correctly with the outputFieldNameMap: + extraFieldOutputMap.putAll(overrideAnnotations); + + // Go through all output fields and see if any of the names in the value list are in our extraFieldOutputMap. + // For any that match, we remove them from our extraFieldOutputMap and add them to the outputMap with the + // correct key. + for (final Map.Entry> entry : outputFieldNameMap.entrySet()) { + for (final String fieldName : entry.getValue()) { + if (extraFieldOutputMap.containsKey(fieldName)) { + outputMap.put(entry.getKey(), extraFieldOutputMap.remove(fieldName)); + break; + } } } - } - // Merge our output maps together: - outputMap.putAll(extraFieldOutputMap); + // Merge our output maps together: + outputMap.putAll(extraFieldOutputMap); - // Now translate fields to the field names that MAF likes: - final LinkedHashMap mafCompliantOutputMap = replaceFuncotationValuesWithMafCompliantValues(outputMap); + // Now translate fields to the field names that MAF likes: + final LinkedHashMap mafCompliantOutputMap = replaceFuncotationValuesWithMafCompliantValues(outputMap); - // Write our header if we have to: - if ( ! hasWrittenHeader ) { - writeHeader(mafCompliantOutputMap); - } + // Write our header if we have to: + if (!hasWrittenHeader) { + writeHeader(mafCompliantOutputMap); + } - // Write the output (with manual annotations at the end): - for ( final Map.Entry entry : mafCompliantOutputMap.entrySet() ) { - writeString(entry.getValue()); - writeString(MafOutputRendererConstants.FIELD_DELIMITER); + // Write the output (with manual annotations at the end): + for (final Map.Entry entry : mafCompliantOutputMap.entrySet()) { + writeString(entry.getValue()); + writeString(MafOutputRendererConstants.FIELD_DELIMITER); + } + writeLine(manualAnnotationSerializedString); } - writeLine(manualAnnotationSerializedString); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRendererConstants.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRendererConstants.java index 25c607ed03a..fe8d9601fe0 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRendererConstants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRendererConstants.java @@ -8,7 +8,7 @@ * Class to hold all the constants required for the {@link MafOutputRenderer}. * Designed to be a simple container class with no methods. */ -class MafOutputRendererConstants { +public class MafOutputRendererConstants { //================================================================================================================== // Static initializers: @@ -62,86 +62,86 @@ class MafOutputRendererConstants { // Specific Field Values: // Field Names: - static final String FieldName_Hugo_Symbol = "Hugo_Symbol"; - static final String FieldName_Entrez_Gene_Id = "Entrez_Gene_Id"; - static final String FieldName_Center = "Center"; - static final String FieldName_NCBI_Build = "NCBI_Build"; - static final String FieldName_Chromosome = "Chromosome"; - static final String FieldName_Start_Position = "Start_Position"; - static final String FieldName_End_Position = "End_Position"; - static final String FieldName_Strand = "Strand"; - static final String FieldName_Variant_Classification = "Variant_Classification"; - static final String FieldName_Variant_Type = "Variant_Type"; - static final String FieldName_Reference_Allele = "Reference_Allele"; - static final String FieldName_Tumor_Seq_Allele1 = "Tumor_Seq_Allele1"; - static final String FieldName_Tumor_Seq_Allele2 = "Tumor_Seq_Allele2"; - static final String FieldName_dbSNP_RS = "dbSNP_RS"; - static final String FieldName_dbSNP_Val_Status = "dbSNP_Val_Status"; - static final String FieldName_Tumor_Sample_Barcode = "Tumor_Sample_Barcode"; - static final String FieldName_Matched_Norm_Sample_Barcode = "Matched_Norm_Sample_Barcode"; - static final String FieldName_Match_Norm_Seq_Allele1 = "Match_Norm_Seq_Allele1"; - static final String FieldName_Match_Norm_Seq_Allele2 = "Match_Norm_Seq_Allele2"; - static final String FieldName_Tumor_Validation_Allele1 = "Tumor_Validation_Allele1"; - static final String FieldName_Tumor_Validation_Allele2 = "Tumor_Validation_Allele2"; - static final String FieldName_Match_Norm_Validation_Allele1 = "Match_Norm_Validation_Allele1"; - static final String FieldName_Match_Norm_Validation_Allele2 = "Match_Norm_Validation_Allele2"; - static final String FieldName_Verification_Status = "Verification_Status"; - static final String FieldName_Validation_Status = "Validation_Status"; - static final String FieldName_Mutation_Status = "Mutation_Status"; - static final String FieldName_Sequencing_Phase = "Sequencing_Phase"; - static final String FieldName_Sequence_Source = "Sequence_Source"; - static final String FieldName_Validation_Method = "Validation_Method"; - static final String FieldName_Score = "Score"; - static final String FieldName_BAM_File = "BAM_File"; - static final String FieldName_Sequencer = "Sequencer"; - static final String FieldName_Tumor_Sample_UUID = "Tumor_Sample_UUID"; - static final String FieldName_Matched_Norm_Sample_UUID = "Matched_Norm_Sample_UUID"; - static final String FieldName_Genome_Change = "Genome_Change"; - static final String FieldName_Annotation_Transcript = "Annotation_Transcript"; - static final String FieldName_Transcript_Strand = "Transcript_Strand"; - static final String FieldName_Transcript_Exon = "Transcript_Exon"; - static final String FieldName_Transcript_Position = "Transcript_Position"; - static final String FieldName_cDNA_Change = "cDNA_Change"; - static final String FieldName_Codon_Change = "Codon_Change"; - static final String FieldName_Protein_Change = "Protein_Change"; - static final String FieldName_Other_Transcripts = "Other_Transcripts"; - static final String FieldName_Refseq_mRNA_Id = "Refseq_mRNA_Id"; - static final String FieldName_Refseq_prot_Id = "Refseq_prot_Id"; - static final String FieldName_SwissProt_acc_Id = "SwissProt_acc_Id"; - static final String FieldName_SwissProt_entry_Id = "SwissProt_entry_Id"; - static final String FieldName_Description = "Description"; - static final String FieldName_UniProt_AApos = "UniProt_AApos"; - static final String FieldName_UniProt_Region = "UniProt_Region"; - static final String FieldName_UniProt_Site = "UniProt_Site"; - static final String FieldName_UniProt_Natural_Variations = "UniProt_Natural_Variations"; - static final String FieldName_UniProt_Experimental_Info = "UniProt_Experimental_Info"; - static final String FieldName_GO_Biological_Process = "GO_Biological_Process"; - static final String FieldName_GO_Cellular_Component = "GO_Cellular_Component"; - static final String FieldName_GO_Molecular_Function = "GO_Molecular_Function"; - static final String FieldName_COSMIC_overlapping_mutations = "COSMIC_overlapping_mutations"; - static final String FieldName_COSMIC_fusion_genes = "COSMIC_fusion_genes"; - static final String FieldName_COSMIC_tissue_types_affected = "COSMIC_tissue_types_affected"; - static final String FieldName_COSMIC_total_alterations_in_gene = "COSMIC_total_alterations_in_gene"; - static final String FieldName_Tumorscape_Amplification_Peaks = "Tumorscape_Amplification_Peaks"; - static final String FieldName_Tumorscape_Deletion_Peaks = "Tumorscape_Deletion_Peaks"; - static final String FieldName_TCGAscape_Amplification_Peaks = "TCGAscape_Amplification_Peaks"; - static final String FieldName_TCGAscape_Deletion_Peaks = "TCGAscape_Deletion_Peaks"; - static final String FieldName_DrugBank = "DrugBank"; - static final String FieldName_ref_context = "ref_context"; - static final String FieldName_gc_content = "gc_content"; - static final String FieldName_CCLE_ONCOMAP_overlapping_mutations = "CCLE_ONCOMAP_overlapping_mutations"; - static final String FieldName_CCLE_ONCOMAP_total_mutations_in_gene = "CCLE_ONCOMAP_total_mutations_in_gene"; - static final String FieldName_CGC_Mutation_Type = "CGC_Mutation_Type"; - static final String FieldName_CGC_Translocation_Partner = "CGC_Translocation_Partner"; - static final String FieldName_CGC_Tumor_Types_Somatic = "CGC_Tumor_Types_Somatic"; - static final String FieldName_CGC_Tumor_Types_Germline = "CGC_Tumor_Types_Germline"; - static final String FieldName_CGC_Other_Diseases = "CGC_Other_Diseases"; - static final String FieldName_DNARepairGenes_Activity_linked_to_OMIM = "DNARepairGenes_Activity_linked_to_OMIM"; - static final String FieldName_FamilialCancerDatabase_Syndromes = "FamilialCancerDatabase_Syndromes"; - static final String FieldName_MUTSIG_Published_Results = "MUTSIG_Published_Results"; - static final String FieldName_OREGANNO_ID = "OREGANNO_ID"; - static final String FieldName_OREGANNO_Values = "OREGANNO_Values"; - static final String FieldName_tumor_f = "tumor_f"; + public static final String FieldName_Hugo_Symbol = "Hugo_Symbol"; + public static final String FieldName_Entrez_Gene_Id = "Entrez_Gene_Id"; + public static final String FieldName_Center = "Center"; + public static final String FieldName_NCBI_Build = "NCBI_Build"; + public static final String FieldName_Chromosome = "Chromosome"; + public static final String FieldName_Start_Position = "Start_Position"; + public static final String FieldName_End_Position = "End_Position"; + public static final String FieldName_Strand = "Strand"; + public static final String FieldName_Variant_Classification = "Variant_Classification"; + public static final String FieldName_Variant_Type = "Variant_Type"; + public static final String FieldName_Reference_Allele = "Reference_Allele"; + public static final String FieldName_Tumor_Seq_Allele1 = "Tumor_Seq_Allele1"; + public static final String FieldName_Tumor_Seq_Allele2 = "Tumor_Seq_Allele2"; + public static final String FieldName_dbSNP_RS = "dbSNP_RS"; + public static final String FieldName_dbSNP_Val_Status = "dbSNP_Val_Status"; + public static final String FieldName_Tumor_Sample_Barcode = "Tumor_Sample_Barcode"; + public static final String FieldName_Matched_Norm_Sample_Barcode = "Matched_Norm_Sample_Barcode"; + public static final String FieldName_Match_Norm_Seq_Allele1 = "Match_Norm_Seq_Allele1"; + public static final String FieldName_Match_Norm_Seq_Allele2 = "Match_Norm_Seq_Allele2"; + public static final String FieldName_Tumor_Validation_Allele1 = "Tumor_Validation_Allele1"; + public static final String FieldName_Tumor_Validation_Allele2 = "Tumor_Validation_Allele2"; + public static final String FieldName_Match_Norm_Validation_Allele1 = "Match_Norm_Validation_Allele1"; + public static final String FieldName_Match_Norm_Validation_Allele2 = "Match_Norm_Validation_Allele2"; + public static final String FieldName_Verification_Status = "Verification_Status"; + public static final String FieldName_Validation_Status = "Validation_Status"; + public static final String FieldName_Mutation_Status = "Mutation_Status"; + public static final String FieldName_Sequencing_Phase = "Sequencing_Phase"; + public static final String FieldName_Sequence_Source = "Sequence_Source"; + public static final String FieldName_Validation_Method = "Validation_Method"; + public static final String FieldName_Score = "Score"; + public static final String FieldName_BAM_File = "BAM_File"; + public static final String FieldName_Sequencer = "Sequencer"; + public static final String FieldName_Tumor_Sample_UUID = "Tumor_Sample_UUID"; + public static final String FieldName_Matched_Norm_Sample_UUID = "Matched_Norm_Sample_UUID"; + public static final String FieldName_Genome_Change = "Genome_Change"; + public static final String FieldName_Annotation_Transcript = "Annotation_Transcript"; + public static final String FieldName_Transcript_Strand = "Transcript_Strand"; + public static final String FieldName_Transcript_Exon = "Transcript_Exon"; + public static final String FieldName_Transcript_Position = "Transcript_Position"; + public static final String FieldName_cDNA_Change = "cDNA_Change"; + public static final String FieldName_Codon_Change = "Codon_Change"; + public static final String FieldName_Protein_Change = "Protein_Change"; + public static final String FieldName_Other_Transcripts = "Other_Transcripts"; + public static final String FieldName_Refseq_mRNA_Id = "Refseq_mRNA_Id"; + public static final String FieldName_Refseq_prot_Id = "Refseq_prot_Id"; + public static final String FieldName_SwissProt_acc_Id = "SwissProt_acc_Id"; + public static final String FieldName_SwissProt_entry_Id = "SwissProt_entry_Id"; + public static final String FieldName_Description = "Description"; + public static final String FieldName_UniProt_AApos = "UniProt_AApos"; + public static final String FieldName_UniProt_Region = "UniProt_Region"; + public static final String FieldName_UniProt_Site = "UniProt_Site"; + public static final String FieldName_UniProt_Natural_Variations = "UniProt_Natural_Variations"; + public static final String FieldName_UniProt_Experimental_Info = "UniProt_Experimental_Info"; + public static final String FieldName_GO_Biological_Process = "GO_Biological_Process"; + public static final String FieldName_GO_Cellular_Component = "GO_Cellular_Component"; + public static final String FieldName_GO_Molecular_Function = "GO_Molecular_Function"; + public static final String FieldName_COSMIC_overlapping_mutations = "COSMIC_overlapping_mutations"; + public static final String FieldName_COSMIC_fusion_genes = "COSMIC_fusion_genes"; + public static final String FieldName_COSMIC_tissue_types_affected = "COSMIC_tissue_types_affected"; + public static final String FieldName_COSMIC_total_alterations_in_gene = "COSMIC_total_alterations_in_gene"; + public static final String FieldName_Tumorscape_Amplification_Peaks = "Tumorscape_Amplification_Peaks"; + public static final String FieldName_Tumorscape_Deletion_Peaks = "Tumorscape_Deletion_Peaks"; + public static final String FieldName_TCGAscape_Amplification_Peaks = "TCGAscape_Amplification_Peaks"; + public static final String FieldName_TCGAscape_Deletion_Peaks = "TCGAscape_Deletion_Peaks"; + public static final String FieldName_DrugBank = "DrugBank"; + public static final String FieldName_ref_context = "ref_context"; + public static final String FieldName_gc_content = "gc_content"; + public static final String FieldName_CCLE_ONCOMAP_overlapping_mutations = "CCLE_ONCOMAP_overlapping_mutations"; + public static final String FieldName_CCLE_ONCOMAP_total_mutations_in_gene = "CCLE_ONCOMAP_total_mutations_in_gene"; + public static final String FieldName_CGC_Mutation_Type = "CGC_Mutation_Type"; + public static final String FieldName_CGC_Translocation_Partner = "CGC_Translocation_Partner"; + public static final String FieldName_CGC_Tumor_Types_Somatic = "CGC_Tumor_Types_Somatic"; + public static final String FieldName_CGC_Tumor_Types_Germline = "CGC_Tumor_Types_Germline"; + public static final String FieldName_CGC_Other_Diseases = "CGC_Other_Diseases"; + public static final String FieldName_DNARepairGenes_Activity_linked_to_OMIM = "DNARepairGenes_Activity_linked_to_OMIM"; + public static final String FieldName_FamilialCancerDatabase_Syndromes = "FamilialCancerDatabase_Syndromes"; + public static final String FieldName_MUTSIG_Published_Results = "MUTSIG_Published_Results"; + public static final String FieldName_OREGANNO_ID = "OREGANNO_ID"; + public static final String FieldName_OREGANNO_Values = "OREGANNO_Values"; + public static final String FieldName_tumor_f = "tumor_f"; // Field Values: static final String FieldValue_Strand = "+"; @@ -156,19 +156,19 @@ class MafOutputRendererConstants { static final Map VariantClassificationMap; // Output Field Name Map Defaults: - static final List OutputFieldNameMap_Hugo_Symbol = Arrays.asList(FieldName_Hugo_Symbol, "Gencode_19_hugoSymbol", "Gencode_27_hugoSymbol", "gene", "Gene"); + static final List OutputFieldNameMap_Hugo_Symbol = Arrays.asList(FieldName_Hugo_Symbol, "Gencode_19_hugoSymbol", "Gencode_27_hugoSymbol", "Gencode_28_hugoSymbol", "gene", "Gene"); static final List OutputFieldNameMap_Entrez_Gene_Id = Arrays.asList(FieldName_Entrez_Gene_Id, "HGNC_Entrez_Gene_ID", "HGNC_Entrez Gene ID", "HGNC_Entrez_Gene_ID(supplied_by_NCBI)", "HGNC_Entrez Gene ID(supplied by NCBI)", "entrez_id", "gene_id"); static final List OutputFieldNameMap_Center = Arrays.asList(FieldName_Center, "center"); - static final List OutputFieldNameMap_NCBI_Build = Arrays.asList(FieldName_NCBI_Build, "Gencode_19_ncbiBuild", "Gencode_27_ncbiBuild", "ncbi_build"); - static final List OutputFieldNameMap_Chromosome = Arrays.asList(FieldName_Chromosome, "Gencode_19_chromosome", "Gencode_27_chromosome", "chr", "contig", "chromosome", "chrom", "Chrom"); - static final List OutputFieldNameMap_Start_Position = Arrays.asList(FieldName_Start_Position, "Start_position", "Gencode_19_start", "Gencode_27_start", "start", "Start", "start_pos", "pos"); - static final List OutputFieldNameMap_End_Position = Arrays.asList(FieldName_End_Position, "End_position", "Gencode_19_end", "Gencode_27_end", "end", "End", "end_pos"); + static final List OutputFieldNameMap_NCBI_Build = Arrays.asList(FieldName_NCBI_Build, "Gencode_19_ncbiBuild", "Gencode_27_ncbiBuild", "Gencode_28_ncbiBuild", "ncbi_build"); + static final List OutputFieldNameMap_Chromosome = Arrays.asList(FieldName_Chromosome, "Gencode_19_chromosome", "Gencode_27_chromosome", "Gencode_28_chromosome", "chr", "contig", "chromosome", "chrom", "Chrom"); + static final List OutputFieldNameMap_Start_Position = Arrays.asList(FieldName_Start_Position, "Start_position", "Gencode_19_start", "Gencode_27_start", "Gencode_28_start", "start", "Start", "start_pos", "pos"); + static final List OutputFieldNameMap_End_Position = Arrays.asList(FieldName_End_Position, "End_position", "Gencode_19_end", "Gencode_27_end", "Gencode_28_end", "end", "End", "end_pos"); static final List OutputFieldNameMap_Strand = Collections.singletonList(FieldName_Strand); - static final List OutputFieldNameMap_Variant_Classification = Arrays.asList(FieldName_Variant_Classification, "Gencode_19_variantClassification", "Gencode_27_variantClassification", "variant_classification"); - static final List OutputFieldNameMap_Variant_Type = Arrays.asList(FieldName_Variant_Type, "Gencode_19_variantType", "Gencode_27_variantType", "variant_type"); - static final List OutputFieldNameMap_Reference_Allele = Arrays.asList(FieldName_Reference_Allele, "Gencode_19_refAllele", "Gencode_27_refAllele", "ref", "ref_allele", "reference_allele"); - static final List OutputFieldNameMap_Tumor_Seq_Allele1 = Arrays.asList(FieldName_Tumor_Seq_Allele1, "Gencode_19_tumorSeqAllele1", "Gencode_27_tumorSeqAllele1", "ref", "ref_allele", "reference_allele"); - static final List OutputFieldNameMap_Tumor_Seq_Allele2 = Arrays.asList(FieldName_Tumor_Seq_Allele2, "Gencode_19_tumorSeqAllele2", "Gencode_27_tumorSeqAllele2", "alt", "alt_allele", "alt2", "alt_allele2", "alternate_allele2", "observed_allele2", "alternate_allele", "observed_allele", "alt1", "alt_allele1", "alternate_allele1", "observed_allele1"); + static final List OutputFieldNameMap_Variant_Classification = Arrays.asList(FieldName_Variant_Classification, "Gencode_19_variantClassification", "Gencode_27_variantClassification", "Gencode_28_variantClassification", "variant_classification"); + static final List OutputFieldNameMap_Variant_Type = Arrays.asList(FieldName_Variant_Type, "Gencode_19_variantType", "Gencode_27_variantType", "Gencode_28_variantType", "variant_type"); + static final List OutputFieldNameMap_Reference_Allele = Arrays.asList(FieldName_Reference_Allele, "Gencode_19_refAllele", "Gencode_27_refAllele", "Gencode_28_refAllele", "ref", "ref_allele", "reference_allele"); + static final List OutputFieldNameMap_Tumor_Seq_Allele1 = Arrays.asList(FieldName_Tumor_Seq_Allele1, "Gencode_19_tumorSeqAllele1", "Gencode_27_tumorSeqAllele1", "Gencode_28_tumorSeqAllele1", "ref", "ref_allele", "reference_allele"); + static final List OutputFieldNameMap_Tumor_Seq_Allele2 = Arrays.asList(FieldName_Tumor_Seq_Allele2, "Gencode_19_tumorSeqAllele2", "Gencode_27_tumorSeqAllele2", "Gencode_28_tumorSeqAllele2", "alt", "alt_allele", "alt2", "alt_allele2", "alternate_allele2", "observed_allele2", "alternate_allele", "observed_allele", "alt1", "alt_allele1", "alternate_allele1", "observed_allele1"); static final List OutputFieldNameMap_dbSNP_RS = Arrays.asList(FieldName_dbSNP_RS, "dbsnp_rs", "dbSNP_RSPOS"); static final List OutputFieldNameMap_dbSNP_Val_Status = Arrays.asList(FieldName_dbSNP_Val_Status, "dbsnp_val_status", "dbSNP_VLD"); static final List OutputFieldNameMap_Tumor_Sample_Barcode = Arrays.asList(FieldName_Tumor_Sample_Barcode, "tumor_barcode", "tumor_id", "case_barcode", "case_id", "tumor_name"); @@ -190,15 +190,15 @@ class MafOutputRendererConstants { static final List OutputFieldNameMap_Sequencer = Arrays.asList(FieldName_Sequencer, "sequencer", "platform"); static final List OutputFieldNameMap_Tumor_Sample_UUID = Arrays.asList(FieldName_Tumor_Sample_UUID, "tumor_uuid", "case_uuid", "tumor_barcode", "tumor_id", "case_barcode", "case_id", "tumor_name", "Tumor_Sample_Barcode"); static final List OutputFieldNameMap_Matched_Norm_Sample_UUID = Arrays.asList(FieldName_Matched_Norm_Sample_UUID, "normal_uuid", "control_uuid", "normal_barcode", "normal_id", "control_barcode", "control_id", "normal_name", "sample_name", "Matched_Norm_Sample_Barcode"); - static final List OutputFieldNameMap_Genome_Change = Arrays.asList(FieldName_Genome_Change, "Gencode_19_genomeChange", "Gencode_27_genomeChange", "genome_change"); - static final List OutputFieldNameMap_Annotation_Transcript = Arrays.asList(FieldName_Annotation_Transcript, "Gencode_19_annotationTranscript", "Gencode_27_annotationTranscript", "annotation_transcript", "transcript_id"); - static final List OutputFieldNameMap_Transcript_Strand = Arrays.asList(FieldName_Transcript_Strand, "Gencode_19_transcriptStrand", "Gencode_27_transcriptStrand", "transcript_strand"); - static final List OutputFieldNameMap_Transcript_Exon = Arrays.asList(FieldName_Transcript_Exon, "Gencode_19_transcriptExon", "Gencode_27_transcriptExon", "transcript_exon"); - static final List OutputFieldNameMap_Transcript_Position = Arrays.asList(FieldName_Transcript_Position, "Gencode_19_transcriptPos", "Gencode_27_transcriptPos", "transcript_position"); - static final List OutputFieldNameMap_cDNA_Change = Arrays.asList(FieldName_cDNA_Change, "Gencode_19_cDnaChange", "Gencode_27_cDnaChange", "transcript_change"); - static final List OutputFieldNameMap_Codon_Change = Arrays.asList(FieldName_Codon_Change, "Gencode_19_codonChange", "Gencode_27_codonChange", "codon_change"); - static final List OutputFieldNameMap_Protein_Change = Arrays.asList(FieldName_Protein_Change, "Gencode_19_proteinChange", "Gencode_27_proteinChange", "protein_change"); - static final List OutputFieldNameMap_Other_Transcripts = Arrays.asList(FieldName_Other_Transcripts, "Gencode_19_otherTranscripts", "Gencode_27_otherTranscripts", "other_transcripts"); + static final List OutputFieldNameMap_Genome_Change = Arrays.asList(FieldName_Genome_Change, "Gencode_19_genomeChange", "Gencode_27_genomeChange", "Gencode_28_genomeChange", "genome_change"); + static final List OutputFieldNameMap_Annotation_Transcript = Arrays.asList(FieldName_Annotation_Transcript, "Gencode_19_annotationTranscript", "Gencode_27_annotationTranscript", "Gencode_28_annotationTranscript", "annotation_transcript", "transcript_id"); + static final List OutputFieldNameMap_Transcript_Strand = Arrays.asList(FieldName_Transcript_Strand, "Gencode_19_transcriptStrand", "Gencode_27_transcriptStrand", "Gencode_28_transcriptStrand", "transcript_strand"); + static final List OutputFieldNameMap_Transcript_Exon = Arrays.asList(FieldName_Transcript_Exon, "Gencode_19_transcriptExon", "Gencode_27_transcriptExon", "Gencode_28_transcriptExon", "transcript_exon"); + static final List OutputFieldNameMap_Transcript_Position = Arrays.asList(FieldName_Transcript_Position, "Gencode_19_transcriptPos", "Gencode_27_transcriptPos", "Gencode_28_transcriptPos", "transcript_position"); + static final List OutputFieldNameMap_cDNA_Change = Arrays.asList(FieldName_cDNA_Change, "Gencode_19_cDnaChange", "Gencode_27_cDnaChange", "Gencode_28_cDnaChange", "transcript_change"); + static final List OutputFieldNameMap_Codon_Change = Arrays.asList(FieldName_Codon_Change, "Gencode_19_codonChange", "Gencode_27_codonChange", "Gencode_28_codonChange", "codon_change"); + static final List OutputFieldNameMap_Protein_Change = Arrays.asList(FieldName_Protein_Change, "Gencode_19_proteinChange", "Gencode_27_proteinChange", "Gencode_28_proteinChange", "protein_change"); + static final List OutputFieldNameMap_Other_Transcripts = Arrays.asList(FieldName_Other_Transcripts, "Gencode_19_otherTranscripts", "Gencode_27_otherTranscripts", "Gencode_28_otherTranscripts", "other_transcripts"); static final List OutputFieldNameMap_Refseq_mRNA_Id = Arrays.asList(FieldName_Refseq_mRNA_Id, "Gencode_XRefSeq_mRNA_id", "gencode_xref_refseq_mRNA_id", "ENSEMBL_RefSeq_mRNA_accession", "RefSeq_mRNA_Id", "HGNC_RefSeq IDs"); static final List OutputFieldNameMap_Refseq_prot_Id = Arrays.asList(FieldName_Refseq_prot_Id, "Gencode_XRefSeq_prot_acc", "gencode_xref_refseq_prot_acc", "ENSEMBL_RefSeq_protein_accession", "RefSeq_prot_Id"); static final List OutputFieldNameMap_SwissProt_acc_Id = Arrays.asList(FieldName_SwissProt_acc_Id, "Simple_Uniprot_uniprot_accession", "uniprot_accession", "UniProt_uniprot_accession"); @@ -221,8 +221,8 @@ class MafOutputRendererConstants { static final List OutputFieldNameMap_TCGAscape_Amplification_Peaks = Arrays.asList(FieldName_TCGAscape_Amplification_Peaks, "TCGAScape_Amplification_Peaks"); static final List OutputFieldNameMap_TCGAscape_Deletion_Peaks = Arrays.asList(FieldName_TCGAscape_Deletion_Peaks, "TCGAScape_Deletion_Peaks"); static final List OutputFieldNameMap_DrugBank = Arrays.asList(FieldName_DrugBank, "Simple_Uniprot_DrugBank", "UniProt_DrugBank"); - static final List OutputFieldNameMap_ref_context = Arrays.asList(FieldName_ref_context, "Gencode_19_referenceContext", "Gencode_27_referenceContext", "ref_context"); - static final List OutputFieldNameMap_gc_content = Arrays.asList(FieldName_gc_content, "Gencode_19_gcContent", "Gencode_27_gcContent", "gc_content"); + static final List OutputFieldNameMap_ref_context = Arrays.asList(FieldName_ref_context, "Gencode_19_referenceContext", "Gencode_27_referenceContext", "Gencode_28_referenceContext", "ref_context"); + static final List OutputFieldNameMap_gc_content = Arrays.asList(FieldName_gc_content, "Gencode_19_gcContent", "Gencode_27_gcContent", "Gencode_28_gcContent", "gc_content"); static final List OutputFieldNameMap_CCLE_ONCOMAP_overlapping_mutations = Arrays.asList(FieldName_CCLE_ONCOMAP_overlapping_mutations, "CCLE_By_GP_overlapping_mutations"); static final List OutputFieldNameMap_CCLE_ONCOMAP_total_mutations_in_gene = Arrays.asList(FieldName_CCLE_ONCOMAP_total_mutations_in_gene, "CCLE_By_Gene_total_mutations_in_gene"); static final List OutputFieldNameMap_CGC_Mutation_Type = Arrays.asList(FieldName_CGC_Mutation_Type, "CGC_Mutation Type"); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/vcfOutput/VcfOutputRenderer.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/vcfOutput/VcfOutputRenderer.java index 10725a8495e..ccacfac2fa2 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/vcfOutput/VcfOutputRenderer.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/vcfOutput/VcfOutputRenderer.java @@ -5,11 +5,8 @@ import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.*; -import org.apache.commons.lang3.StringUtils; -import org.broadinstitute.hellbender.tools.funcotator.DataSourceFuncotationFactory; -import org.broadinstitute.hellbender.tools.funcotator.Funcotation; -import org.broadinstitute.hellbender.tools.funcotator.Funcotator; -import org.broadinstitute.hellbender.tools.funcotator.OutputRenderer; +import org.apache.commons.lang.StringUtils; +import org.broadinstitute.hellbender.tools.funcotator.*; import org.broadinstitute.hellbender.tools.funcotator.dataSources.DataSourceUtils; import org.broadinstitute.hellbender.utils.Utils; @@ -40,7 +37,21 @@ public class VcfOutputRenderer extends OutputRenderer { /** * The delimiter for the `Other Transcript` field within the Funcotation annotation in the VCF. */ - public static final String OTHER_TRANSCRIPT_DELIMITER = ";"; + public static final String OTHER_TRANSCRIPT_DELIMITER = "/"; + + /** + * The delimiter to use when separating the information regarding a transcript. + */ + public static final String ALL_TRANSCRIPT_DELIMITER = "#"; + + /** + * The delimiter to use when starting the information regarding a transcript. + */ + public static final String START_TRANSCRIPT_DELIMITER = "["; + /** + * The delimiter to use when ending the information regarding a transcript. + */ + public static final String END_TRANSCRIPT_DELIMITER = "]"; //================================================================================================================== @@ -100,7 +111,7 @@ public void close() { } @Override - public void write(final VariantContext variant, final List funcotations) { + public void write(final VariantContext variant, final FuncotationMap txToFuncotationMap) { // Create a new variant context builder: final VariantContextBuilder variantContextOutputBuilder = new VariantContextBuilder(variant); @@ -128,13 +139,20 @@ public void write(final VariantContext variant, final List funcotat funcotatorAnnotationStringBuilder.append(FIELD_DELIMITER); } - funcotatorAnnotationStringBuilder.append( - funcotations.stream() - .filter(f -> f.getAltAllele().equals(altAllele) ) - .map(f -> retrieveSanitizedFuncotation(f, manualAnnotationSerializedString)) - .collect(Collectors.joining(FIELD_DELIMITER)) - ); - funcotatorAnnotationStringBuilder.append(","); + for (final String txId : txToFuncotationMap.keyList()) { + funcotatorAnnotationStringBuilder.append(START_TRANSCRIPT_DELIMITER); + final List funcotations = txToFuncotationMap.get(txId); + funcotatorAnnotationStringBuilder.append( + funcotations.stream() + .filter(f -> f.getAltAllele().equals(altAllele)) + .map(f -> retrieveSanitizedFuncotation(f, manualAnnotationSerializedString)) + .collect(Collectors.joining(FIELD_DELIMITER)) + ); + funcotatorAnnotationStringBuilder.append(END_TRANSCRIPT_DELIMITER + ALL_TRANSCRIPT_DELIMITER); + } + // We have a trailing "#" - we need to remove it: + funcotatorAnnotationStringBuilder.deleteCharAt(funcotatorAnnotationStringBuilder.length()-1); + funcotatorAnnotationStringBuilder.append(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR); } // We have a trailing "," - we need to remove it: @@ -168,12 +186,16 @@ private VCFHeader createVCFHeader() { final String dataSourceFields = getDataSourceFieldNamesForHeader(dataSourceFactories); final String manualAnnotationFields = String.join( HEADER_LISTED_FIELD_DELIMITER, manualAnnotations.keySet() ); + // Construct (only) the field list delimited by HEADER_LISTED_FIELD_DELIMITER + final String delimitedFields = StringUtils.isEmpty(manualAnnotationFields) ? dataSourceFields : + manualAnnotationFields + HEADER_LISTED_FIELD_DELIMITER + dataSourceFields; + // Add in the lines about Funcotations: headerLines.addAll(defaultToolVcfHeaderLines); headerLines.add(new VCFHeaderLine("Funcotator Version", Funcotator.VERSION + " | " + getDataSourceInfoString())); headerLines.add(new VCFInfoHeaderLine(FUNCOTATOR_VCF_FIELD_NAME, VCFHeaderLineCount.A, VCFHeaderLineType.String, "Functional annotation from the Funcotator tool. Funcotation fields are: " + - manualAnnotationFields + HEADER_LISTED_FIELD_DELIMITER + dataSourceFields) + delimitedFields) ); // Create a new header and preserve the genotype sample names: @@ -196,7 +218,7 @@ private static String getDataSourceFieldNamesForHeader(final List pik3caFeatureReader = AbstractFeatureReader.getFeatureReader( FuncotatorTestConstants.PIK3CA_GENCODE_ANNOTATIONS_FILE_NAME, new GencodeGtfCodec() ); + private static final FeatureReader muc16FeatureReader = AbstractFeatureReader.getFeatureReader(FuncotatorTestConstants.MUC16_GENCODE_ANNOTATIONS_FILE_NAME, new GencodeGtfCodec() ); + @DataProvider + public Object[][] provideCreationFromFuncotationVcfHeaderString() { + return new Object[][] { + { + "Functional annotation from the Funcotator tool. Funcotation fields are: Gencode_19_hugoSymbol|Gencode_19_ncbiBuild|Gencode_19_chromosome", + "[FOO|hg19|4]", + Arrays.asList(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY), + Arrays.asList(ImmutableSortedMap.of("Gencode_19_hugoSymbol", "FOO", "Gencode_19_ncbiBuild", "hg19", "Gencode_19_chromosome", "4")) + }, { + "Functional annotation from the Funcotator tool. Funcotation fields are: Gencode_19_hugoSymbol|Gencode_19_ncbiBuild|Gencode_19_chromosome", + "[FOO|hg19|]", + Arrays.asList(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY), + Arrays.asList(ImmutableSortedMap.of("Gencode_19_hugoSymbol", "FOO", "Gencode_19_ncbiBuild", "hg19", "Gencode_19_chromosome", "")) + }, { + "Functional annotation from the Funcotator tool. Funcotation fields are: Gencode_19_hugoSymbol|Gencode_19_ncbiBuild|Gencode_19_chromosome", + "[|hg19|4]", + Arrays.asList(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY), + Arrays.asList(ImmutableSortedMap.of("Gencode_19_hugoSymbol", "", "Gencode_19_ncbiBuild", "hg19", "Gencode_19_chromosome", "4")) + }, { + "Functional annotation from the Funcotator tool. Funcotation fields are: Gencode_19_hugoSymbol|Gencode_19_ncbiBuild|Gencode_19_annotationTranscript", + "[FOO|hg19|txID1]#[BAR|hg38|txID2]", + Arrays.asList("txID1", "txID2"), + Arrays.asList( + ImmutableSortedMap.of("Gencode_19_hugoSymbol", "FOO", "Gencode_19_ncbiBuild", "hg19", "Gencode_19_annotationTranscript", "txID1"), + ImmutableSortedMap.of("Gencode_19_hugoSymbol", "BAR", "Gencode_19_ncbiBuild", "hg38", "Gencode_19_annotationTranscript", "txID2") + ) + }, { + "Functional annotation from the Funcotator tool. Funcotation fields are: Gencode_19_hugoSymbol|Gencode_19_ncbiBuild|Gencode_19_annotationTranscript", + "[FOO|hg19|txID1]#[BAR|hg38|txID2]#[BAZ|hg38|txID3]", + Arrays.asList("txID1", "txID2", "txID3"), + Arrays.asList( + ImmutableSortedMap.of("Gencode_19_hugoSymbol", "FOO", "Gencode_19_ncbiBuild", "hg19", "Gencode_19_annotationTranscript", "txID1"), + ImmutableSortedMap.of("Gencode_19_hugoSymbol", "BAR", "Gencode_19_ncbiBuild", "hg38", "Gencode_19_annotationTranscript", "txID2"), + ImmutableSortedMap.of("Gencode_19_hugoSymbol", "BAZ", "Gencode_19_ncbiBuild", "hg38", "Gencode_19_annotationTranscript", "txID3") + ) + } + }; + } + + @Test(dataProvider = "provideCreationFromFuncotationVcfHeaderString") + public void testCreationFromFuncotationVcfHeaderString(final String headerDescription, final String funcotationValue, final List gtTranscriptIDs, final List> gtMaps) { + final Allele dummyAllele = Allele.create("A"); + final FuncotationMap testMap = FuncotationMap.createAsAllTableFuncotationsFromVcf("Gencode_19_annotationTranscript", + FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(headerDescription), + funcotationValue, dummyAllele, "TEST"); + final List transcriptIds = testMap.keyList(); + Assert.assertEquals(new HashSet<>(transcriptIds), new HashSet<>(gtTranscriptIDs)); + for (int i = 0; i < gtMaps.size(); i++){ + final SortedMap gtMap = gtMaps.get(i); + final String transcriptId = transcriptIds.get(i); + Assert.assertEquals(testMap.get(transcriptId).get(0).getFieldNames().size(), gtMap.keySet().size()); + Assert.assertEquals(testMap.get(transcriptId).size(), 1); // We have one funcotation with X fields. + Assert.assertTrue(gtMap.keySet().stream().allMatch(k -> gtMap.get(k).equals(testMap.getFieldValue(transcriptId, k, dummyAllele)))); + } + } + + @DataProvider + public Object[][] provideGencodeFuncotationCreation() { + // All of these were chosen not to be IGR. + return new Object[][] { + {"chr3", 178916538, 178916538, "G", "C", FuncotatorReferenceTestUtils.retrieveHg19Chr3Ref(), + ReferenceDataSource.of( IOUtils.getPath(FuncotatorReferenceTestUtils.retrieveHg19Chr3Ref())), + pik3caFeatureReader, DS_PIK3CA_HG19_GENCODE_FASTA, + TranscriptSelectionMode.ALL, Arrays.asList("ENST00000263967.3") + },{"chr3", 178916538, 178916538, "G", "C", FuncotatorReferenceTestUtils.retrieveHg19Chr3Ref(), + ReferenceDataSource.of( IOUtils.getPath(FuncotatorReferenceTestUtils.retrieveHg19Chr3Ref())), + pik3caFeatureReader, DS_PIK3CA_HG19_GENCODE_FASTA, + TranscriptSelectionMode.CANONICAL, Arrays.asList("ENST00000263967.3") + },{"chr19", 8994200, 8994200, "G", "C", FuncotatorReferenceTestUtils.retrieveHg19Chr19Ref(), + ReferenceDataSource.of( IOUtils.getPath(FuncotatorReferenceTestUtils.retrieveHg19Chr19Ref())), + muc16FeatureReader, DS_MUC16_HG19_GENCODE_FASTA, + TranscriptSelectionMode.ALL, Arrays.asList("ENST00000397910.4", "ENST00000380951.5") + + // Next one tests where we would be in a gene with more than one basic transcript, but variant only overlaps one. And we still ask for all, + // but since one is IGR, it will never get added the the FuncotationMap. + }, {"chr19", 9014550, 9014550, "T", "A", FuncotatorReferenceTestUtils.retrieveHg19Chr19Ref(), + ReferenceDataSource.of(IOUtils.getPath(FuncotatorReferenceTestUtils.retrieveHg19Chr19Ref())), + muc16FeatureReader, DS_MUC16_HG19_GENCODE_FASTA, + TranscriptSelectionMode.ALL, Arrays.asList("ENST00000397910.4") + + // Next one tests where we would be in a gene with more than one basic transcript, variant overlaps both, but we are in canonical mode. + },{"chr19", 8994200, 8994200, "G", "C", FuncotatorReferenceTestUtils.retrieveHg19Chr19Ref(), + ReferenceDataSource.of( IOUtils.getPath(FuncotatorReferenceTestUtils.retrieveHg19Chr19Ref())), + muc16FeatureReader, DS_MUC16_HG19_GENCODE_FASTA, + TranscriptSelectionMode.CANONICAL, Arrays.asList("ENST00000397910.4") + + // Next one tests where we would be in a gene with more than one basic transcript, variant overlaps both, but we are in effect mode. + },{"chr19", 8994200, 8994200, "G", "C", FuncotatorReferenceTestUtils.retrieveHg19Chr19Ref(), + ReferenceDataSource.of( IOUtils.getPath(FuncotatorReferenceTestUtils.retrieveHg19Chr19Ref())), + muc16FeatureReader, DS_MUC16_HG19_GENCODE_FASTA, + TranscriptSelectionMode.BEST_EFFECT, Arrays.asList("ENST00000397910.4") + } + }; + } + @Test(dataProvider = "provideGencodeFuncotationCreation") + public void testGencodeFuncotationCreation(final String contig, + final int start, + final int end, + final String ref, + final String alt, + final String referenceFileName, + final ReferenceDataSource referenceDataSource, + final FeatureReader featureReader, + final String transcriptFastaFile, + final TranscriptSelectionMode transcriptSelectionMode, + final List gtTranscripts) { + + final SimpleInterval variantInterval = new SimpleInterval( contig, start, end ); + + final Allele refAllele = Allele.create(ref, true); + final Allele altAllele = Allele.create(alt); + + final VariantContextBuilder variantContextBuilder = new VariantContextBuilder( + referenceFileName, + contig, + start, + end, + Arrays.asList(refAllele, altAllele) + ); + final VariantContext variantContext = variantContextBuilder.make(); + + final ReferenceContext referenceContext = new ReferenceContext(referenceDataSource, variantInterval ); + + // Get our gene feature iterator: + final CloseableTribbleIterator gtfFeatureIterator; + try { + gtfFeatureIterator = featureReader.query(contig, start, end); + } + catch (final IOException ex) { + throw new GATKException("Could not finish the test!", ex); + } + final List featureList = Collections.singletonList(gtfFeatureIterator.next()); + + final String gencode_test = "GENCODE_TEST"; + final GencodeFuncotationFactory gencodeFactory = new GencodeFuncotationFactory(Paths.get(transcriptFastaFile), + "TEST", gencode_test, transcriptSelectionMode, new HashSet<>(), new LinkedHashMap<>(), + true); + + final List gencodeFuncotations = + gencodeFactory.createFuncotations(variantContext, referenceContext, Collections.singletonMap(gencode_test, featureList)).stream() + .map(f -> (GencodeFuncotation) f).collect(Collectors.toList()); + + final FuncotationMap funcotationMap = FuncotationMap.createFromGencodeFuncotations(gencodeFuncotations); + + Assert.assertEquals(funcotationMap.keyList(), gtTranscripts); + Assert.assertTrue(funcotationMap.keyList().stream().allMatch(k -> funcotationMap.get(k).size() == 1)); + Assert.assertTrue(funcotationMap.keyList().stream() + .noneMatch(k -> ((GencodeFuncotation) funcotationMap.get(k).get(0)).getVariantClassification().equals(GencodeFuncotation.VariantClassification.IGR) )); + Assert.assertTrue(funcotationMap.keyList().stream() + .noneMatch(k -> ((GencodeFuncotation) funcotationMap.get(k).get(0)).getVariantClassification().equals(GencodeFuncotation.VariantClassification.COULD_NOT_DETERMINE) )); + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java index a4a6c5fe97b..df2b466dd02 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java @@ -1,14 +1,22 @@ package org.broadinstitute.hellbender.tools.funcotator; +import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFInfoHeaderLine; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.engine.FeatureDataSource; import org.broadinstitute.hellbender.exceptions.GATKException; +import org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.AnnotatedIntervalCollection; import org.broadinstitute.hellbender.tools.funcotator.dataSources.xsv.SimpleKeyXsvFuncotationFactory; +import org.broadinstitute.hellbender.tools.funcotator.mafOutput.MafOutputRendererConstants; +import org.broadinstitute.hellbender.tools.funcotator.vcfOutput.VcfOutputRenderer; import org.broadinstitute.hellbender.utils.test.ArgumentsBuilder; import org.broadinstitute.hellbender.utils.test.FuncotatorReferenceTestUtils; import org.broadinstitute.hellbender.utils.test.IntegrationTestSpec; +import org.broadinstitute.hellbender.utils.test.VariantContextTestUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -17,9 +25,10 @@ import java.io.File; import java.io.FileWriter; import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; +import java.util.*; +import java.util.stream.Collectors; + +import static org.broadinstitute.hellbender.tools.funcotator.FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription; /** * An integration test for the {@link Funcotator} tool. @@ -34,17 +43,23 @@ public class FuncotatorIntegrationTest extends CommandLineProgramTest { // Whether to do debug output (i.e. leave output around). // This should always be false when checked in. - private static final boolean doDebugTests = false; - private static final String LARGE_DATASOURCES_FOLDER = "funcotator_dataSources_latest"; + private static final boolean doDebugTests = false; + private static final String LARGE_DATASOURCES_FOLDER = "funcotator_dataSources_latest"; private static final String PIK3CA_VCF_HG19 = toolsTestDir + "funcotator/0816201804HC0_R01C01.pik3ca.vcf"; private static final String PIK3CA_VCF_HG38 = toolsTestDir + "funcotator/hg38_trio.pik3ca.vcf"; - private static final String DS_PIK3CA_DIR = largeFileTestDir + "funcotator/small_ds/"; + private static final String PIK3CA_VCF_HG19_SNPS = toolsTestDir + "funcotator/PIK3CA_SNPS_3.vcf"; + private static final String PIK3CA_VCF_HG19_INDELS = toolsTestDir + "funcotator/PIK3CA_INDELS_3.vcf"; + private static final String MUC16_VCF_HG19 = toolsTestDir + "funcotator/MUC16_MNP.vcf"; + private static final String PIK3CA_VCF_HG19_ALTS = toolsTestDir + "funcotator/PIK3CA_3_miss_clinvar_alt_only.vcf"; + private static final String DS_PIK3CA_DIR = largeFileTestDir + "funcotator/small_ds_pik3ca/"; + private static final String DS_MUC16_DIR = largeFileTestDir + "funcotator/small_ds_muc16/"; private static String hg38Chr3Ref; private static String b37Chr3Ref; private static String hg19Chr3Ref; private static String hg19Chr19Ref; + static { if (!doDebugTests) { tmpOutDir = createTempDir("funcotatorTmpFolder"); @@ -440,4 +455,139 @@ public void testCanAnnotateHg38ClinvarAndGencodeV28() { .filter(vc -> StringUtils.contains(vc.getAttributeAsString("FUNCOTATION", ""), "MedGen")) .count(), NUM_CLINVAR_HITS); } + + @DataProvider(name = "provideForMafVcfConcordanceProteinChange") + final Object[][] provideForMafVcfConcordanceProteinChange() { + return new Object[][]{ + {PIK3CA_VCF_HG19_SNPS, b37Chr3Ref, FuncotatorTestConstants.REFERENCE_VERSION_HG19, Arrays.asList("Gencode_19_proteinChange"), DS_PIK3CA_DIR, 15}, + {PIK3CA_VCF_HG19_INDELS, b37Chr3Ref, FuncotatorTestConstants.REFERENCE_VERSION_HG19, Arrays.asList("Gencode_19_proteinChange"), DS_PIK3CA_DIR, 57}, + {MUC16_VCF_HG19, hg19Chr19Ref, FuncotatorTestConstants.REFERENCE_VERSION_HG19, Arrays.asList("Gencode_19_proteinChange"), DS_MUC16_DIR, 2057} + }; + } + + /** + * Make sure that VCFs and MAFs have exactly the same protein change strings. This test does not look for + * multiallelics. This test is really only meant to test the rendering itself. + */ + @Test(dataProvider = "provideForMafVcfConcordanceProteinChange") + public void testVcfMafConcordanceForProteinChange(final String inputVcf, final String inputRef, + final String funcotatorRef, final List annotationsToCheck, + final String datasourceDir, + final int gtNumVariants) { + final FuncotatorArgumentDefinitions.OutputFormatType vcfOutputFormatType = FuncotatorArgumentDefinitions.OutputFormatType.VCF; + final File vcfOutputFile = getOutputFile(vcfOutputFormatType); + + final ArgumentsBuilder argumentsVcf = new ArgumentsBuilder(); + + argumentsVcf.addVCF(new File(inputVcf)); + argumentsVcf.addOutput(vcfOutputFile); + argumentsVcf.addReference(new File(inputRef)); + argumentsVcf.addArgument(FuncotatorArgumentDefinitions.DATA_SOURCES_PATH_LONG_NAME, datasourceDir); + argumentsVcf.addArgument(FuncotatorArgumentDefinitions.REFERENCE_VERSION_LONG_NAME, funcotatorRef); + argumentsVcf.addArgument(FuncotatorArgumentDefinitions.OUTPUT_FORMAT_LONG_NAME, vcfOutputFormatType.toString()); + argumentsVcf.addBooleanArgument(FuncotatorArgumentDefinitions.REMOVE_FILTERED_VARIANTS_LONG_NAME, false); + argumentsVcf.addArgument(FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_LONG_NAME, TranscriptSelectionMode.CANONICAL.toString()); + + // We need this argument since we are testing on a subset of b37 + argumentsVcf.addBooleanArgument(FuncotatorArgumentDefinitions.ALLOW_HG19_GENCODE_B37_CONTIG_MATCHING_OVERRIDE_LONG_NAME, true); + runCommandLine(argumentsVcf); + + final FuncotatorArgumentDefinitions.OutputFormatType mafOutputFormatType = FuncotatorArgumentDefinitions.OutputFormatType.MAF; + final File mafOutputFile = getOutputFile(mafOutputFormatType); + + final ArgumentsBuilder argumentsMaf = new ArgumentsBuilder(); + + argumentsMaf.addVCF(new File(inputVcf)); + argumentsMaf.addOutput(mafOutputFile); + argumentsMaf.addReference(new File(inputRef)); + argumentsMaf.addArgument(FuncotatorArgumentDefinitions.DATA_SOURCES_PATH_LONG_NAME, datasourceDir); + argumentsMaf.addArgument(FuncotatorArgumentDefinitions.REFERENCE_VERSION_LONG_NAME, funcotatorRef); + argumentsMaf.addArgument(FuncotatorArgumentDefinitions.OUTPUT_FORMAT_LONG_NAME, mafOutputFormatType.toString()); + argumentsMaf.addBooleanArgument(FuncotatorArgumentDefinitions.REMOVE_FILTERED_VARIANTS_LONG_NAME, false); + argumentsMaf.addBooleanArgument(FuncotatorArgumentDefinitions.ALLOW_HG19_GENCODE_B37_CONTIG_MATCHING_OVERRIDE_LONG_NAME, true); + argumentsMaf.addArgument(FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_LONG_NAME, TranscriptSelectionMode.CANONICAL.toString()); + + runCommandLine(argumentsMaf); + + final Pair> vcfInfo = VariantContextTestUtils.readEntireVCFIntoMemory(vcfOutputFile.getAbsolutePath()); + final List variantContexts = vcfInfo.getRight(); + final VCFHeader vcfHeader = vcfInfo.getLeft(); + final VCFInfoHeaderLine funcotationHeaderLine = vcfHeader.getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME); + + Assert.assertTrue(variantContexts.stream().allMatch(v -> v.hasAttribute(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME))); + + final AnnotatedIntervalCollection maf = AnnotatedIntervalCollection.create(mafOutputFile.toPath(), null); + Assert.assertEquals(maf.getRecords().size(), gtNumVariants); + Assert.assertTrue(maf.getRecords().stream() + .anyMatch(v -> !v.getAnnotationValue(MafOutputRendererConstants.FieldName_Variant_Classification).equals("IGR"))); + Assert.assertTrue(maf.getRecords().stream() + .anyMatch(v -> v.getAnnotationValue(MafOutputRendererConstants.FieldName_Variant_Classification).equals("Missense_Mutation") || + v.getAnnotationValue(MafOutputRendererConstants.FieldName_Variant_Classification).startsWith("Frame_Shift"))); + + // Get the protein changes: + final String[] funcotationKeys = extractFuncotatorKeysFromHeaderDescription(funcotationHeaderLine.getDescription()); + + for (final String annotationToCheck: annotationsToCheck) { + final List mafProteinChanges = maf.getRecords().stream().map(v -> v.getAnnotationValue(MafOutputRendererConstants.FieldName_Protein_Change)).collect(Collectors.toList()); + + // Note that we assume that each variant context has one allele and one transcript. This is true due to the + // datasources and input VCF. + // Don't try to refactor this for-loop to a stream here. + final List vcfProteinChanges = new ArrayList<>(); + for (final VariantContext v: variantContexts) { + final Map alleleFuncotationMapMap = FuncotatorUtils.createAlleleToFuncotationMapFromFuncotationVcfAttribute(funcotationKeys, v, "Gencode_19_annotationTranscript"); + final Allele alternateAllele = v.getAlternateAllele(0); + final FuncotationMap funcotationMap = alleleFuncotationMapMap.get(alternateAllele); + vcfProteinChanges.add(funcotationMap.getFieldValue(funcotationMap.keyList().get(0), annotationToCheck, alternateAllele)); + } + Assert.assertEquals(mafProteinChanges, vcfProteinChanges, "Failed matching " + annotationToCheck); + } + } + + @Test + public void testVcfDatasourceAccountsForAltAlleles() { + final FuncotatorArgumentDefinitions.OutputFormatType vcfOutputFormatType = FuncotatorArgumentDefinitions.OutputFormatType.VCF; + final File vcfOutputFile = getOutputFile(vcfOutputFormatType); + + final ArgumentsBuilder argumentsVcf = new ArgumentsBuilder(); + + argumentsVcf.addVCF(new File(PIK3CA_VCF_HG19_ALTS)); + argumentsVcf.addOutput(vcfOutputFile); + argumentsVcf.addReference(new File(b37Chr3Ref)); + argumentsVcf.addArgument(FuncotatorArgumentDefinitions.DATA_SOURCES_PATH_LONG_NAME, DS_PIK3CA_DIR); + argumentsVcf.addArgument(FuncotatorArgumentDefinitions.REFERENCE_VERSION_LONG_NAME, FuncotatorTestConstants.REFERENCE_VERSION_HG19); + argumentsVcf.addArgument(FuncotatorArgumentDefinitions.OUTPUT_FORMAT_LONG_NAME, vcfOutputFormatType.toString()); + argumentsVcf.addBooleanArgument(FuncotatorArgumentDefinitions.REMOVE_FILTERED_VARIANTS_LONG_NAME, false); + + // We need this argument since we are testing on a subset of b37 + argumentsVcf.addBooleanArgument(FuncotatorArgumentDefinitions.ALLOW_HG19_GENCODE_B37_CONTIG_MATCHING_OVERRIDE_LONG_NAME, true); + runCommandLine(argumentsVcf); + + final Pair> vcfInfo = VariantContextTestUtils.readEntireVCFIntoMemory(vcfOutputFile.getAbsolutePath()); + final List variantContexts = vcfInfo.getRight(); + Assert.assertTrue(variantContexts.size() > 0); + final VCFHeader vcfHeader = vcfInfo.getLeft(); + final VCFInfoHeaderLine funcotationHeaderLine = vcfHeader.getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME); + final String[] funcotationKeys = extractFuncotatorKeysFromHeaderDescription(funcotationHeaderLine.getDescription()); + + // The first variant context should have clinvar annotations, since it hit on the alt allele. None of the rest. + // This test assumes that each test variant context has only one alt allele. + // The rest should not have any clinvar hits. + for (int i = 0; i < variantContexts.size(); i++) { + final String gtString = (i == 0) ? "MedGen:C0027672,SNOMED_CT:699346009" : ""; + final Map alleleToFuncotationMap = FuncotatorUtils.createAlleleToFuncotationMapFromFuncotationVcfAttribute(funcotationKeys, variantContexts.get(i), "Gencode_19_annotationTranscript"); + Assert.assertEquals(alleleToFuncotationMap.entrySet().size(), 1); + + final FuncotationMap funcotationMap = alleleToFuncotationMap.values().iterator().next(); + Assert.assertEquals(funcotationMap.keyList().size(), 1); + Assert.assertTrue(funcotationMap.keyList().stream().noneMatch(k -> k.equals(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY))); + Assert.assertTrue(funcotationMap.keyList().stream().noneMatch(k -> StringUtils.isEmpty(k))); + final List funcotations = funcotationMap.get(funcotationMap.keyList().get(0)); + Assert.assertEquals(funcotations.size(), 1); + final Funcotation funcotation = funcotations.get(0); + + Assert.assertEquals(funcotation.getField("dummy_ClinVar_VCF_CLNDISDB"), FuncotatorUtils.sanitizeFuncotationForVcf(gtString)); + } + } } + diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactoryUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactoryUnitTest.java index eac40a81adc..1044eab9185 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactoryUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactoryUnitTest.java @@ -68,7 +68,7 @@ public class GencodeFuncotationFactoryUnitTest extends GATKBaseTest { GencodeFuncotationFactory.DEFAULT_NAME, FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE, new HashSet<>(), - new LinkedHashMap<>()); + new LinkedHashMap<>(), false); } //================================================================================================================== @@ -1156,7 +1156,7 @@ void testMuc16SnpCreateFuncotations(final int chromosomeNumber, GencodeFuncotationFactory.DEFAULT_NAME, FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE, requestedTranscriptIds, - new LinkedHashMap<>())) { + new LinkedHashMap<>(), true)) { // Generate our funcotations: final List featureList = new ArrayList<>(); @@ -1213,7 +1213,7 @@ void createNonBasicFuncotations(final int start, final int end) { GencodeFuncotationFactory.DEFAULT_NAME, FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE, new HashSet<>(), - new LinkedHashMap<>())) { + new LinkedHashMap<>(), false)) { // Generate our funcotations: final List featureList = new ArrayList<>(); @@ -1283,7 +1283,7 @@ void testCreateFuncotations(final String expectedGeneName, GencodeFuncotationFactory.DEFAULT_NAME, FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE, requestedTranscriptIds, - new LinkedHashMap<>())) { + new LinkedHashMap<>(), true)) { final List featureList = new ArrayList<>(); featureList.add( gene ); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationUnitTest.java index cc18884ebdd..801a8b16712 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationUnitTest.java @@ -95,127 +95,127 @@ Object[][] provideDataForTestSerializationOverrides() { overrideVal + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_ncbiBuild", overrideVal), "TESTGENE" + D + overrideVal + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_chromosome", overrideVal), "TESTGENE" + D + "BUILD1" + D + overrideVal + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_start", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + overrideVal + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_end", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + overrideVal + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_variantClassification", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + overrideVal + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_secondaryVariantClassification", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + overrideVal + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_variantType", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + overrideVal + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_refAllele", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + overrideVal + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_tumorSeqAllele1", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + overrideVal + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_tumorSeqAllele2", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + overrideVal + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_genomeChange", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + overrideVal + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_annotationTranscript", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + overrideVal + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_transcriptStrand", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - overrideVal + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + overrideVal + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_transcriptExon", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + overrideVal + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + overrideVal + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_transcriptPos", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + overrideVal + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + overrideVal + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_cDnaChange", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + overrideVal + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + overrideVal + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_codonChange", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + overrideVal + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + overrideVal + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_proteinChange", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + overrideVal + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + overrideVal + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_gcContent", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + overrideVal + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + overrideVal + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_gcContent", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + overrideVal + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + overrideVal + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { setFuncotationFieldOverride(gencodeFuncotation, "Gencode_TEST_VERSION_otherTranscripts", overrideVal), "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + @@ -242,7 +242,7 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { "CRUMB BUM!!!!!" + "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, // All fields: { @@ -254,7 +254,7 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { createGencodeFuncotation(null, "BUILD1", "chr1", 1, 100, @@ -265,7 +265,7 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { createGencodeFuncotation("TESTGENE", null, "chr1", 1, 100, @@ -276,7 +276,7 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { "TESTGENE" + D + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { createGencodeFuncotation("TESTGENE", "BUILD1", null, 50, 60, @@ -287,7 +287,7 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { "TESTGENE" + D + "BUILD1" + D + D + 50 + D + 60 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { createGencodeFuncotation("TESTGENE", "BUILD1", "chr1", 1, 100, @@ -298,7 +298,7 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { createGencodeFuncotation("TESTGENE", "BUILD1", "chr1", 1, 100, @@ -309,7 +309,7 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { createGencodeFuncotation("TESTGENE", "BUILD1", "chr1", 1, 100, @@ -320,7 +320,7 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { createGencodeFuncotation("TESTGENE", "BUILD1", "chr1", 1, 100, @@ -331,7 +331,7 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "G" + D + "G" + D + "C" + D + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ACC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ACC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { createGencodeFuncotation("TESTGENE", "BUILD1", "chr1", 1, 100, @@ -342,7 +342,7 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + D + - D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { createGencodeFuncotation("TESTGENE", "BUILD1", "chr1", 1, 100, @@ -353,7 +353,7 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + D + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + D + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { createGencodeFuncotation("TESTGENE", "BUILD1", "chr1", 1, 100, @@ -364,7 +364,7 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + D + "ATC" + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { createGencodeFuncotation("TESTGENE", "BUILD1", "chr1", 1, 100, @@ -375,7 +375,7 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + D + "Lys" + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { createGencodeFuncotation("TESTGENE", "BUILD1", "chr1", 1, 100, @@ -386,7 +386,7 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + D + "1.0" + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + D + "1.0" + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { createGencodeFuncotation("TESTGENE", "BUILD1", "chr1", 1, 100, @@ -397,7 +397,7 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + D + "ATGCGCAT" + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + D + "ATGCGCAT" + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, { createGencodeFuncotation("TESTGENE", "BUILD1", "chr1", 1, 100, @@ -408,7 +408,7 @@ Object[][] createGencodeFuncotationsAndStringSerializations() { "TESTGENE" + D + "BUILD1" + D + "chr1" + D + 1 + D + 100 + D + GencodeFuncotation.VariantClassification.NONSENSE + D + GencodeFuncotation.VariantClassification.INTRON + D + GencodeFuncotation.VariantType.SNP + D + "A" + D + "A" + D + "T" + D + "big changes" + D + "T1" + D + - "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + D + "ONE;TWO;THREE" + "3'" + D + "1" + D + 1 + D + "A" + D + "ATC" + D + "Lys" + D + "1.0" + D + D + "ONE" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "TWO" + VcfOutputRenderer.OTHER_TRANSCRIPT_DELIMITER + "THREE" }, }; } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactoryUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactoryUnitTest.java index 193ac5f05c3..662b404b095 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactoryUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactoryUnitTest.java @@ -165,19 +165,11 @@ private Object[][] provideForTestCreateFuncotationsOnVariant() { Allele.create("C"), FACTORY_NAME) ) ), - // Three overlapping VCF features: + // No overlapping VCF features, since there are no indels in dbSNP (the test datasource), so the ground truth should be a default entry, which was constructed here manually: helpProvideForTestCreateFuncotations("3", 64157, 64166, "AGAAAGGTCA", "TCTTTCCAGT", - Arrays.asList( - new TableFuncotation(FIELD_DEFAULT_MAP.keySet().stream().map(s->FACTORY_NAME + "_" + s).collect(Collectors.toList()), - Arrays.asList("true","false","0.9996,.,0.0003994","false","false","1","false","false","false","","false","false","false","false","true","false","false","false","false","false","false","false","false","false","false","false","false","false","false","false","527707435","64158","false","false","0","false","0","false","0.999966,3.4343e-005,.","false","false","false","SNV","true","0x050000000005040026000100","1","false","142"), - Allele.create("TCTTTCCAGT"), FACTORY_NAME), - new TableFuncotation(FIELD_DEFAULT_MAP.keySet().stream().map(s->FACTORY_NAME + "_" + s).collect(Collectors.toList()), - Arrays.asList("true","false","0.9976,0.002396","false","false","1","false","false","false","","false","false","false","false","true","false","false","false","false","false","false","false","false","false","false","false","false","false","false","false","549364629","64163","false","false","0","false","0","false","0.9966,0.00339996","false","false","false","SNV","true","0x050000000005040026000100","1","false","142"), - Allele.create("TCTTTCCAGT"), FACTORY_NAME), - new TableFuncotation(FIELD_DEFAULT_MAP.keySet().stream().map(s->FACTORY_NAME + "_" + s).collect(Collectors.toList()), - Arrays.asList("true","false","0.6466,0.3534","false","false","1","false","true","true","","true","false","false","true","true","false","false","false","false","false","false","false","false","false","false","false","false","false","false","false","9311287","64165","false","false","0","true","0","false","","false","false","false","SNV","true","0x05010000000517013f000100","1","false","119"), - Allele.create("TCTTTCCAGT"), FACTORY_NAME) - ) + Collections.singletonList(new TableFuncotation(FIELD_DEFAULT_MAP.keySet().stream().map(s->FACTORY_NAME + "_" + s).collect(Collectors.toList()), + Arrays.asList("false","false","","false","false","","false","false","false","","false","false","false","false","false","false","false","false","false","false","false","false","false","false","false","false","false","false","false","false","","","false","false","","false","","false","","false","false","false","","false","","","false",""), + Allele.create("TCTTTCCAGT"), FACTORY_NAME)) ), }; } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRendererUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRendererUnitTest.java index 092468cae8c..4f01c172ee0 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRendererUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRendererUnitTest.java @@ -7,10 +7,7 @@ import org.apache.commons.collections.MapUtils; import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.exceptions.GATKException; -import org.broadinstitute.hellbender.tools.funcotator.DataSourceFuncotationFactory; -import org.broadinstitute.hellbender.tools.funcotator.Funcotation; -import org.broadinstitute.hellbender.tools.funcotator.FuncotatorTestConstants; -import org.broadinstitute.hellbender.tools.funcotator.TranscriptSelectionMode; +import org.broadinstitute.hellbender.tools.funcotator.*; import org.broadinstitute.hellbender.tools.funcotator.dataSources.DataSourceUtils; import org.broadinstitute.hellbender.tools.funcotator.dataSources.TableFuncotation; import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation; @@ -111,7 +108,7 @@ private MafOutputRenderer createMafOutputRenderer(final File outputFile) { configData, new LinkedHashMap<>(), TranscriptSelectionMode.BEST_EFFECT, - new HashSet<>() + new HashSet<>(), true ); // Sort the datasources to ensure the same order every time: @@ -707,7 +704,8 @@ public void testWrite(final List variants, final List