Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Type change int -> long to prevent tranche novel variant count overflow #7864

Merged
merged 2 commits into from
Jun 17, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,16 @@ public class Tranche {
final double minVQSLod; //minimum value of VQSLOD in this tranche
final double knownTiTv; //titv value of known sites in this tranche
final double novelTiTv; //titv value of novel sites in this tranche
final int numKnown; //number of known sites in this tranche
final int numNovel; //number of novel sites in this tranche
final long numKnown; //number of known sites in this tranche
final long numNovel; //number of novel sites in this tranche
final VariantRecalibratorArgumentCollection.Mode model;
final String name; //Name of the tranche

public Tranche(final String name, final double knownTiTv, final int numNovel, final double minVQSLod, final VariantRecalibratorArgumentCollection.Mode model, final double novelTiTv, final int accessibleTruthSites, final int numKnown, final int callsAtTruthSites) {
public Tranche(final String name, final double knownTiTv, final long numNovel, final double minVQSLod,
final VariantRecalibratorArgumentCollection.Mode model, final double novelTiTv,
final int accessibleTruthSites, final long numKnown, final int callsAtTruthSites) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you check the upstream call sites for uses of int to store the number of known/novel sites? If there's an int anywhere in the call chain the overflow issues will likely persist...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found one in the input parsing and fixed it. (The number of truth sites values are also ints, but with the resources we use today they're way, way below Integer.MAX_VALUE and I don't expect that to ever change based on the size of the human genome.)

if ( numKnown < 0 || numNovel < 0) {
throw new GATKException("Invalid tranche - no. variants is < 0 : known " + numKnown + " novel " + numNovel);
throw new GATKException("Invalid tranche " + name + " - no. variants is < 0 : known " + numKnown + " novel " + numNovel);
}

if ( name == null ) {
Expand Down Expand Up @@ -104,7 +106,8 @@ public <T extends Tranche> String getTrancheString(final T prev) {
}

protected static Tranche trancheOfVariants(final List<VariantDatum> data, final int minI, final double ts, final VariantRecalibratorArgumentCollection.Mode model ) {
int numKnown = 0, numNovel = 0, knownTi = 0, knownTv = 0, novelTi = 0, novelTv = 0;
long numKnown = 0, numNovel = 0;
int knownTi = 0, knownTv = 0, novelTi = 0, novelTv = 0;

final double minLod = data.get(minI).lod;
for (final VariantDatum datum : data) {
Expand Down Expand Up @@ -147,8 +150,8 @@ protected static Tranche emptyTranche(final List<VariantDatum> data, final int m

final double knownTiTv = 0.0;
final double novelTiTv = 0.0;
final int numKnown = 0;
final int numNovel = 0;
final long numKnown = 0;
final long numNovel = 0;

return new Tranche("unnamed", knownTiTv, numNovel, minLod, model, novelTiTv, accessibleTruthSites, numKnown, nCallsAtTruth);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ final class TruthSensitivityTranche extends Tranche {
public TruthSensitivityTranche(
final double targetTruthSensitivity,
final double minVQSLod,
final int numKnown,
final long numKnown,
final double knownTiTv,
final int numNovel,
final long numNovel,
final double novelTiTv,
final int accessibleTruthSites,
final int callsAtTruthSites,
Expand All @@ -41,9 +41,9 @@ public TruthSensitivityTranche(
public TruthSensitivityTranche(
final double targetTruthSensitivity,
final double minVQSLod,
final int numKnown,
final long numKnown,
final double knownTiTv,
final int numNovel,
final long numNovel,
final double novelTiTv,
final int accessibleTruthSites,
final int callsAtTruthSites,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ public Double getTrancheIndex() {

public VQSLODTranche(
final double minVQSLod,
final int numKnown,
final long numKnown,
final double knownTiTv,
final int numNovel,
final long numNovel,
final double novelTiTv,
final int accessibleTruthSites,
final int callsAtTruthSites,
Expand Down Expand Up @@ -178,10 +178,10 @@ public static List<TruthSensitivityTranche> mergeAndConvertTranches(final TreeMa

public static VQSLODTranche mergeAndConvertTranches(final List<VQSLODTranche> scatteredTranches, VariantRecalibratorArgumentCollection.Mode mode) {
double indexVQSLOD = scatteredTranches.get(0).minVQSLod;
int sumNumKnown = 0;
long sumNumKnown = 0;
double sumKnownTransitions = 0;
double sumKnownTransversions = 0;
int sumNumNovel = 0;
long sumNumNovel = 0;
double sumNovelTransitions = 0;
double sumNovelTransversions = 0;
int sumAccessibleTruthSites = 0;
Expand Down