Skip to content

Commit

Permalink
Incorporating changes from GVS to existing files (broadinstitute#8256)
Browse files Browse the repository at this point in the history
* Absorbed the changes to core GATK files from the long running GVS work.
* Based on the vs_834_deletions branch where code was factored to separate the gvs
  specific classes from the shared gatk code
* There were fairly minimal conflicts between the two except for in the new VQSR package.
  In this case we've taken the master version of those files since the GVS version is out
  of date.
* Removed some large files that don't seem to be referenced
  • Loading branch information
lbergelson authored Apr 11, 2023
1 parent 497725a commit 0374937
Show file tree
Hide file tree
Showing 17 changed files with 422 additions and 163 deletions.
3 changes: 1 addition & 2 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ src/test/resources/large/funcotator/funcotator_dataSources/dna_repair_genes/hg38
src/test/resources/large/funcotator/funcotator_dataSources/familial/hg38 -filter=lfs -diff=lfs -merge=lfs -text
src/test/resources/large/funcotator/funcotator_dataSources/hgnc/hg38 -filter=lfs -diff=lfs -merge=lfs -text
src/test/resources/large/funcotator/funcotator_dataSources/simple_uniprot/hg38 -filter=lfs -diff=lfs -merge=lfs -text

#Otherwise, track everything in large
src/test/resources/large/** filter=lfs diff=lfs merge=lfs -text
src/main/resources/large/** filter=lfs diff=lfs merge=lfs -text

*.psd filter=lfs diff=lfs merge=lfs -text
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ out/
gatkcondaenv.yml
gatkcondaenv.intel.yml
gatkPythonPackageArchive.zip
testfiles/

#Please don't commit me
client_secret.json
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,17 @@ private TraversalParameters parseIntervals(final GenomeLocParser genomeLocParser
}


/**
* Is the interval specified an interval list
*/
public String intervalListFileSpecified() {
if (getIntervalStrings().size()==1 && IntervalUtils.isGatkIntervalFile(getIntervalStrings().get(0))) {
return getIntervalStrings().get(0);
} else {
return null;
}
}

/**
* Have any intervals been specified for inclusion or exclusion
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ public final class AnnotationUtils {
public static final String ALLELE_SPECIFIC_SPLIT_REGEX = "\\|"; //String.split takes a regex, so we need to escape the pipe
public static final String BRACKET_REGEX = "\\[|\\]";
public static final String LIST_DELIMITER = ",";
public static final String MISSING_VALUE = ".";

private AnnotationUtils(){}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,58 @@ public Map<String, Object> annotateRawData(final ReferenceContext ref,
@Override
@SuppressWarnings({"unchecked", "rawtypes"})//FIXME generics here blow up
public Map<String, Object> combineRawData(List<Allele> allelesList, List<ReducibleAnnotationData<?>> listOfRawData) {
return null;
//VC already contains merged alleles from ReferenceConfidenceVariantContextMerger
ReducibleAnnotationData<Integer> combinedData = new AlleleSpecificAnnotationData(allelesList, null);

for (final ReducibleAnnotationData<?> currentValue : listOfRawData) {
ReducibleAnnotationData<Integer> value = (ReducibleAnnotationData<Integer>)currentValue;
parseRawDataString(value);
combineAttributeMap(value, combinedData);
}
final Map<String, Object> annotations = new HashMap<>();
String annotationString = makeRawAnnotationString(allelesList, combinedData.getAttributeMap());
annotations.put(getPrimaryRawKey(), annotationString);
return annotations;
}

protected void parseRawDataString(final ReducibleAnnotationData<Integer> myData) {
final String rawDataString = myData.getRawData();
//get per-allele data by splitting on allele delimiter
final String[] rawDataPerAllele = rawDataString.split(AnnotationUtils.ALLELE_SPECIFIC_SPLIT_REGEX);
for (int i=0; i<rawDataPerAllele.length; i++) {
final String alleleData = rawDataPerAllele[i];
myData.putAttribute(myData.getAlleles().get(i), (alleleData.isEmpty() || alleleData.equals(AnnotationUtils.MISSING_VALUE)) ? null : Integer.parseInt(alleleData));
}
}

public void combineAttributeMap(final ReducibleAnnotationData<Integer> toAdd, final ReducibleAnnotationData<Integer> combined) {
//check that alleles match
for (final Allele currentAllele : combined.getAlleles()){
//combined is initialized with all alleles, but toAdd might have only a subset
if (toAdd.getAttribute(currentAllele) != null) {
if (toAdd.getAttribute(currentAllele) != null && combined.getAttribute(currentAllele) != null) {
combined.putAttribute(currentAllele, (int)combined.getAttribute(currentAllele) + (int)toAdd.getAttribute(currentAllele));
} else {
combined.putAttribute(currentAllele, toAdd.getAttribute(currentAllele));
}
}
}
}

private String makeRawAnnotationString(final List<Allele> vcAlleles, final Map<Allele, Integer> perAlleleValues) {
String annotationString = "";
for (final Allele current : vcAlleles) {
if (!annotationString.isEmpty()) {
annotationString += AnnotationUtils.ALLELE_SPECIFIC_RAW_DELIM;
}
if(perAlleleValues.get(current) != null) {
annotationString += String.format("%d", perAlleleValues.get(current));
} else {
annotationString += String.format("%d", 0);
}
}
return annotationString;
}

/**
* Uses the "AS_QUAL" key, which must be computed by the genotyping engine in GenotypeGVCFs, to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,17 @@ public final class GnarlyGenotyperEngine {
private Set<Class<? extends InfoFieldAnnotation>> allASAnnotations;

private final int maxAltAllelesToOutput;
private final boolean emitPls;
private final boolean keepAllSites;
private final boolean stripASAnnotations;


public GnarlyGenotyperEngine(final boolean keepAllSites, final int maxAltAllelesToOutput, final boolean stripASAnnotations) {
this(keepAllSites, maxAltAllelesToOutput, true, stripASAnnotations);
}

public GnarlyGenotyperEngine(final boolean keepAllSites, final int maxAltAllelesToOutput, final boolean emitPls, final boolean stripASAnnotations) {
this.maxAltAllelesToOutput = maxAltAllelesToOutput;
this.emitPls = emitPls;
this.keepAllSites = keepAllSites;
this.stripASAnnotations = stripASAnnotations;

Expand Down Expand Up @@ -180,7 +185,7 @@ else if (variant.hasAttribute(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY)) {
//Get AC and SB annotations
//remove the NON_REF allele and update genotypes if necessary
final int[] rawGenotypeCounts = new int[3];
final GenotypesContext calledGenotypes = iterateOnGenotypes(variant, targetAlleles, alleleCountMap, SBsum, removeNonRef, variant.hasAttribute(GATKVCFConstants.RAW_GENOTYPE_COUNT_KEY) ? null : rawGenotypeCounts);
final GenotypesContext calledGenotypes = iterateOnGenotypes(variant, targetAlleles, alleleCountMap, SBsum, removeNonRef, emitPls, variant.hasAttribute(GATKVCFConstants.RAW_GENOTYPE_COUNT_KEY) ? null : rawGenotypeCounts);
Integer numCalledAlleles = 0;
if (variant.hasGenotypes()) {
for (final Allele a : targetAlleles) {
Expand Down Expand Up @@ -304,9 +309,9 @@ else if (variant.hasAttribute(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY)) {
*/
@VisibleForTesting
protected GenotypesContext iterateOnGenotypes(final VariantContext vc, final List<Allele> targetAlleles,
final Map<Allele, Integer> targetAlleleCounts, final int[] SBsum,
final boolean nonRefReturned,
final int[] rawGenotypeCounts) {
final Map<Allele, Integer> targetAlleleCounts, final int[] SBsum,
final boolean nonRefReturned, final boolean emitPLs,
final int[] rawGenotypeCounts) {
final int maxAllelesToOutput = maxAltAllelesToOutput + 1; //+1 for ref
final List<Allele> inputAllelesWithNonRef = vc.getAlleles();
if(nonRefReturned && !inputAllelesWithNonRef.get(inputAllelesWithNonRef.size()-1).equals(Allele.NON_REF_ALLELE)) {
Expand Down Expand Up @@ -347,7 +352,11 @@ else if (g.countAllele(Allele.NON_REF_ALLELE) > 0) {
}
if (g.hasPL()) {
final int[] PLs = trimPLs(g, newPLsize);
genotypeBuilder.PL(PLs);
if (emitPLs) {
genotypeBuilder.PL(PLs);
} else {
genotypeBuilder.noPL();
}
genotypeBuilder.GQ(MathUtils.secondSmallestMinusSmallest(PLs, 0));
//If GenomicsDB returns no-call genotypes like CombineGVCFs (depending on the GenomicsDBExportConfiguration),
// then we need to actually find the GT from PLs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1570,7 +1570,7 @@ public static boolean isReciprocalOverlap(final SimpleInterval interval1, final
* </p>
*
* @param input input collection of lacatables, may contain duplicates.
* @param dictionary the referene dictionary.
* @param dictionary the reference dictionary.
* @param <L> the locatable type.
* @throws IllegalArgumentException if input is {@code null}.
* @return never {@code null}, but perhaps an empty map. It is guarantee that no value in the map is an empty list upon return.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package org.broadinstitute.hellbender.utils.bigquery;

import com.google.cloud.bigquery.JobStatistics;
import com.google.cloud.bigquery.TableResult;

public class BigQueryResultAndStatistics {
public final TableResult result;
public final JobStatistics.QueryStatistics queryStatistics;

public BigQueryResultAndStatistics(final TableResult result, final JobStatistics.QueryStatistics queryStatistics) {
this.result = result;
this.queryStatistics = queryStatistics;
}


}
Loading

0 comments on commit 0374937

Please sign in to comment.