From 1a2e5eee95c6a434a10f153026c0c87eba230feb Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Sat, 15 Feb 2020 22:17:03 +0200 Subject: [PATCH 1/8] - included getStop from rev'ed htsJDK. - fixes (again) #1258 which was still broken due to the call to "make()" in the presence of a mismatching "END" attribute and "getEnd()" result. --- src/main/java/picard/util/LiftoverUtils.java | 3 +- .../java/picard/util/LiftoverVcfTest.java | 159 +++++++++++------- 2 files changed, 99 insertions(+), 63 deletions(-) diff --git a/src/main/java/picard/util/LiftoverUtils.java b/src/main/java/picard/util/LiftoverUtils.java index be01c61be9..253b6ab884 100644 --- a/src/main/java/picard/util/LiftoverUtils.java +++ b/src/main/java/picard/util/LiftoverUtils.java @@ -88,8 +88,7 @@ public static VariantContext liftVariant(final VariantContext source, final Inte // If any of the source alleles is symbolic, do not populate the END tag (protecting things like from // getting screwed up in reverse-complemented variants if (source.hasAttribute(VCFConstants.END_KEY) && builder.getAlleles().stream().noneMatch(Allele::isSymbolic)) { - // TODO: add start() and stop() methods to the builder in htsjdk and use stop() here. - builder.attribute(VCFConstants.END_KEY, builder.make().getEnd()); + builder.attribute(VCFConstants.END_KEY, (int)builder.getStop()); } else { builder.rmAttribute(VCFConstants.END_KEY); } diff --git a/src/test/java/picard/util/LiftoverVcfTest.java b/src/test/java/picard/util/LiftoverVcfTest.java index bed3a0fe12..63d118db0f 100644 --- a/src/test/java/picard/util/LiftoverVcfTest.java +++ b/src/test/java/picard/util/LiftoverVcfTest.java @@ -8,11 +8,14 @@ import htsjdk.samtools.util.CollectionUtil; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Interval; -import htsjdk.variant.variantcontext.*; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.GenotypeBuilder; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; +import htsjdk.variant.vcf.VCFConstants; import htsjdk.variant.vcf.VCFFileReader; import htsjdk.variant.vcf.VCFHeader; -import htsjdk.variant.vcf.VCFConstants; - import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.DataProvider; @@ -25,7 +28,11 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; /** * Test class for LiftoverVcf. @@ -410,8 +417,8 @@ public Iterator indelFlipData() { // TTT*/T -> AAA*/A turns into left-aligned CAA*/C at position 1 int start = CHAIN_SIZE - 3; int stop = start + 2; - builder.start(start).stop(stop).alleles(CollectionUtil.makeList(RefTTT, T)); - result_builder.start(1).stop(3).alleles(CollectionUtil.makeList(RefCAA, C)).attribute(LiftoverUtils.REV_COMPED_ALLELES, true); + builder.start(start).stop(stop).alleles(CollectionUtil.makeList(RefTTT, T)).attribute(VCFConstants.END_KEY, stop); + result_builder.start(1).stop(3).alleles(CollectionUtil.makeList(RefCAA, C)).attribute(LiftoverUtils.REV_COMPED_ALLELES, true).attribute(VCFConstants.END_KEY, 3); genotypeBuilder.alleles(builder.getAlleles()); resultGenotypeBuilder.alleles(result_builder.getAlleles()); @@ -420,6 +427,9 @@ public Iterator indelFlipData() { tests.add(new Object[]{builder.make(), REFERENCE, result_builder.make()}); + builder.rmAttribute(VCFConstants.END_KEY); + result_builder.rmAttribute(VCFConstants.END_KEY); + //simple insertion // T*/TTT -> A*/AAA -> turns into left-aligned C*/CAA at position 1 stop = start; @@ -964,7 +974,7 @@ public void testLeftAlignVariants(final VariantContext source, final ReferenceSe public Iterator indelNoFlipData() { final VariantContextBuilder builder = new VariantContextBuilder().source("test1").chr("chr1"); - final VariantContextBuilder result_builder = new VariantContextBuilder().source("test1").chr("chr1"); + final VariantContextBuilder resultBuilder = new VariantContextBuilder().source("test1").chr("chr1"); final GenotypeBuilder genotypeBuilder = new GenotypeBuilder("test1"); final GenotypeBuilder resultGenotypeBuilder = new GenotypeBuilder("test1"); final List tests = new ArrayList<>(); @@ -992,98 +1002,115 @@ public Iterator indelNoFlipData() { // trivial snp builder.source("test1"); builder.start(1).stop(1).alleles(CollectionUtil.makeList(CRef, A)); - result_builder.start(1).stop(1).alleles(CollectionUtil.makeList(CRef, A)); + resultBuilder.start(1).stop(1).alleles(CollectionUtil.makeList(CRef, A)); genotypeBuilder.alleles(builder.getAlleles()); - resultGenotypeBuilder.alleles(result_builder.getAlleles()); + resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); builder.genotypes(genotypeBuilder.make()); - result_builder.genotypes(resultGenotypeBuilder.make()); - tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), result_builder.make()}); + resultBuilder.genotypes(resultGenotypeBuilder.make()); + tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), resultBuilder.make()}); + + // trivial case indel + builder.source("testWithEND"); + builder.start(start).stop(stop).alleles(CollectionUtil.makeList(CAAARef, CAA)).attribute(VCFConstants.END_KEY,(int)stop); + resultBuilder.start(1).stop(4).alleles(CollectionUtil.makeList(CAAARef, CAA)).attribute(VCFConstants.END_KEY,(int)4); + genotypeBuilder.alleles(builder.getAlleles()); + resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); + builder.genotypes(genotypeBuilder.make()); + resultBuilder.genotypes(resultGenotypeBuilder.make()); + tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), resultBuilder.make()}); + builder.rmAttribute(VCFConstants.END_KEY); + resultBuilder.rmAttribute(VCFConstants.END_KEY); // trivial case indel builder.source("test2"); builder.start(start).stop(stop).alleles(CollectionUtil.makeList(CAAARef, CAA)); - result_builder.start(1).stop(4).alleles(CollectionUtil.makeList(CAAARef, CAA)); + resultBuilder.start(1).stop(4).alleles(CollectionUtil.makeList(CAAARef, CAA)); genotypeBuilder.alleles(builder.getAlleles()); - resultGenotypeBuilder.alleles(result_builder.getAlleles()); + resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); builder.genotypes(genotypeBuilder.make()); - result_builder.genotypes(resultGenotypeBuilder.make()); - tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), result_builder.make()}); + resultBuilder.genotypes(resultGenotypeBuilder.make()); + tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), resultBuilder.make()}); // near end of interval indel builder.source("test3"); builder.start(537).stop(540).alleles(CollectionUtil.makeList(AAAARef, AAA)); - result_builder.start(537).stop(540).alleles(CollectionUtil.makeList(AAAARef, AAA)); + resultBuilder.start(537).stop(540).alleles(CollectionUtil.makeList(AAAARef, AAA)); genotypeBuilder.alleles(builder.getAlleles()); - resultGenotypeBuilder.alleles(result_builder.getAlleles()); + resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); builder.genotypes(genotypeBuilder.make()); - result_builder.genotypes(resultGenotypeBuilder.make()); - tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), result_builder.make()}); + resultBuilder.genotypes(resultGenotypeBuilder.make()); + tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), resultBuilder.make()}); // near end of interval snp builder.source("test4"); builder.start(540).stop(540).alleles(CollectionUtil.makeList(ARef, T)); - result_builder.start(540).stop(540).alleles(CollectionUtil.makeList(ARef, T)); + resultBuilder.start(540).stop(540).alleles(CollectionUtil.makeList(ARef, T)); genotypeBuilder.alleles(builder.getAlleles()); - resultGenotypeBuilder.alleles(result_builder.getAlleles()); + resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); builder.genotypes(genotypeBuilder.make()); - result_builder.genotypes(resultGenotypeBuilder.make()); - tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), result_builder.make()}); + resultBuilder.genotypes(resultGenotypeBuilder.make()); + tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), resultBuilder.make()}); // straddling chains indel - builder.source("test5"); - builder.start(538).stop(541).alleles(CollectionUtil.makeList(AAAARef, AAA)); - result_builder.start(537).stop(540).alleles(CollectionUtil.makeList(AAAARef, AAA)); + builder.source("test5_WITH_END"); + + builder.start(538).stop(541).alleles(CollectionUtil.makeList(AAAARef, AAA)).attribute(VCFConstants.END_KEY, 541); + resultBuilder.start(537).stop(540).alleles(CollectionUtil.makeList(AAAARef, AAA)).attribute(VCFConstants.END_KEY, 540); genotypeBuilder.alleles(builder.getAlleles()); - resultGenotypeBuilder.alleles(result_builder.getAlleles()); + resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); builder.genotypes(genotypeBuilder.make()); - result_builder.genotypes(resultGenotypeBuilder.make()); + resultBuilder.genotypes(resultGenotypeBuilder.make()); tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), null}); + builder.rmAttribute(VCFConstants.END_KEY); + resultBuilder.rmAttribute(VCFConstants.END_KEY); // near start of second interval snp - builder.source("test6"); + builder.source("test6_withEND"); start = 541; offset = 5; - builder.start(start).stop(start).alleles(CollectionUtil.makeList(ARef, T)); - result_builder.start(start + offset).stop(start + offset).alleles(CollectionUtil.makeList(ARef, T)); + builder.start(start).stop(start).alleles(CollectionUtil.makeList(ARef, T)).attribute(VCFConstants.END_KEY, start);; + resultBuilder.start(start + offset).stop(start + offset).alleles(CollectionUtil.makeList(ARef, T)).attribute(VCFConstants.END_KEY, start+offset); genotypeBuilder.alleles(builder.getAlleles()); - resultGenotypeBuilder.alleles(result_builder.getAlleles()); + resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); builder.genotypes(genotypeBuilder.make()); - result_builder.genotypes(resultGenotypeBuilder.make()); - tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), result_builder.make()}); + resultBuilder.genotypes(resultGenotypeBuilder.make()); + tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), resultBuilder.make()}); + builder.rmAttribute(VCFConstants.END_KEY); + resultBuilder.rmAttribute(VCFConstants.END_KEY); // near start of second interval indel builder.source("test7"); builder.start(start).stop(start).alleles(CollectionUtil.makeList(ARef, T)); - result_builder.start(start + offset).stop(start + offset).alleles(CollectionUtil.makeList(ARef, T)); + resultBuilder.start(start + offset).stop(start + offset).alleles(CollectionUtil.makeList(ARef, T)); genotypeBuilder.alleles(builder.getAlleles()); - resultGenotypeBuilder.alleles(result_builder.getAlleles()); + resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); builder.genotypes(genotypeBuilder.make()); - result_builder.genotypes(resultGenotypeBuilder.make()); - tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), result_builder.make()}); + resultBuilder.genotypes(resultGenotypeBuilder.make()); + tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), resultBuilder.make()}); // near end of second interval snp builder.source("test8"); start = 1040; offset = 5; builder.start(start).stop(start).alleles(CollectionUtil.makeList(ARef, T)); - result_builder.start(start + offset).stop(start + offset).alleles(CollectionUtil.makeList(ARef, T)); + resultBuilder.start(start + offset).stop(start + offset).alleles(CollectionUtil.makeList(ARef, T)); genotypeBuilder.alleles(builder.getAlleles()); - resultGenotypeBuilder.alleles(result_builder.getAlleles()); + resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); builder.genotypes(genotypeBuilder.make()); - result_builder.genotypes(resultGenotypeBuilder.make()); - tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), result_builder.make()}); + resultBuilder.genotypes(resultGenotypeBuilder.make()); + tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), resultBuilder.make()}); // near end of second interval indel builder.source("test9"); start = 1037; stop = 1040; builder.start(start).stop(stop).alleles(CollectionUtil.makeList(AAAARef, AAA)); - result_builder.start(start + offset).stop(stop + offset).alleles(CollectionUtil.makeList(AAAARef, AAA)); + resultBuilder.start(start + offset).stop(stop + offset).alleles(CollectionUtil.makeList(AAAARef, AAA)); genotypeBuilder.alleles(builder.getAlleles()); - resultGenotypeBuilder.alleles(result_builder.getAlleles()); + resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); builder.genotypes(genotypeBuilder.make()); - result_builder.genotypes(resultGenotypeBuilder.make()); - tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), result_builder.make()}); + resultBuilder.genotypes(resultGenotypeBuilder.make()); + tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), resultBuilder.make()}); // straddling interval indel builder.source("test9"); @@ -1091,9 +1118,9 @@ public Iterator indelNoFlipData() { stop = 1041; builder.start(start).stop(stop).alleles(CollectionUtil.makeList(AAAARef, AAA)); genotypeBuilder.alleles(builder.getAlleles()); - resultGenotypeBuilder.alleles(result_builder.getAlleles()); + resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); builder.genotypes(genotypeBuilder.make()); - result_builder.genotypes(resultGenotypeBuilder.make()); + resultBuilder.genotypes(resultGenotypeBuilder.make()); tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), null}); // straddling interval indel @@ -1102,9 +1129,9 @@ public Iterator indelNoFlipData() { stop = 1048; builder.start(start).stop(stop).alleles(CollectionUtil.makeList(AAAARef, AAA)); genotypeBuilder.alleles(builder.getAlleles()); - resultGenotypeBuilder.alleles(result_builder.getAlleles()); + resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); builder.genotypes(genotypeBuilder.make()); - result_builder.genotypes(resultGenotypeBuilder.make()); + resultBuilder.genotypes(resultGenotypeBuilder.make()); tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), null}); // vanishing snp @@ -1113,9 +1140,9 @@ public Iterator indelNoFlipData() { stop = 1045; builder.start(start).stop(stop).alleles(CollectionUtil.makeList(ARef, T)); genotypeBuilder.alleles(builder.getAlleles()); - resultGenotypeBuilder.alleles(result_builder.getAlleles()); + resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); builder.genotypes(genotypeBuilder.make()); - result_builder.genotypes(resultGenotypeBuilder.make()); + resultBuilder.genotypes(resultGenotypeBuilder.make()); tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), null}); // after second interval indel @@ -1124,23 +1151,23 @@ public Iterator indelNoFlipData() { stop = 1049; offset = 0; builder.start(start).stop(stop).alleles(CollectionUtil.makeList(AAAARef, AAA)); - result_builder.start(start + offset).stop(stop + offset).alleles(CollectionUtil.makeList(AAAARef, AAA)); + resultBuilder.start(start + offset).stop(stop + offset).alleles(CollectionUtil.makeList(AAAARef, AAA)); genotypeBuilder.alleles(builder.getAlleles()); - resultGenotypeBuilder.alleles(result_builder.getAlleles()); + resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); builder.genotypes(genotypeBuilder.make()); - result_builder.genotypes(resultGenotypeBuilder.make()); - tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), result_builder.make()}); + resultBuilder.genotypes(resultGenotypeBuilder.make()); + tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), resultBuilder.make()}); // near start of second interval snp builder.source("test13"); start = 1046; builder.start(start).stop(start).alleles(CollectionUtil.makeList(ARef, T)); - result_builder.start(start + offset).stop(start + offset).alleles(CollectionUtil.makeList(ARef, T)); + resultBuilder.start(start + offset).stop(start + offset).alleles(CollectionUtil.makeList(ARef, T)); genotypeBuilder.alleles(builder.getAlleles()); - resultGenotypeBuilder.alleles(result_builder.getAlleles()); + resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); builder.genotypes(genotypeBuilder.make()); - result_builder.genotypes(resultGenotypeBuilder.make()); - tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), result_builder.make()}); + resultBuilder.genotypes(resultGenotypeBuilder.make()); + tests.add(new Object[]{liftOver, twoIntervalChainReference, builder.make(), resultBuilder.make()}); return tests.iterator(); } @@ -1148,12 +1175,22 @@ public Iterator indelNoFlipData() { @Test(dataProvider = "indelNoFlipData") public void testLiftOverSimpleIndels(final LiftOver liftOver, final ReferenceSequence reference, final VariantContext source, final VariantContext result) { - final Interval target = liftOver.liftOver(new Interval(source.getContig(), source.getStart(), source.getEnd()), .95); + final Interval target = liftOver.liftOver(new Interval(source.getContig(), source.getStart(), source.getEnd()), 1); VariantContextBuilder vcb = LiftoverUtils.liftSimpleVariantContext(source, target); VcfTestUtils.assertEquals(vcb == null ? null : vcb.make(), result); } + @Test(dataProvider = "indelNoFlipData") + public void testLiftOverIndels(final LiftOver liftOver, final ReferenceSequence reference, final VariantContext source, final VariantContext result) { + + final Interval target = liftOver.liftOver(new Interval(source.getContig(), source.getStart(), source.getEnd()), 1); + + VariantContext vc = LiftoverUtils.liftVariant(source, target,reference,false,false); + VcfTestUtils.assertEquals(vc, result); + } + + @DataProvider(name = "noCallAndSymbolicData") public Iterator noCallAndSymbolicData() { From b9c680235e6fc20682185be5b2e41d8edc3b07b5 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Sat, 15 Feb 2020 23:06:19 +0200 Subject: [PATCH 2/8] - typos --- .../java/picard/util/LiftoverVcfTest.java | 65 +++++++++++++------ 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/src/test/java/picard/util/LiftoverVcfTest.java b/src/test/java/picard/util/LiftoverVcfTest.java index 63d118db0f..e39c907c63 100644 --- a/src/test/java/picard/util/LiftoverVcfTest.java +++ b/src/test/java/picard/util/LiftoverVcfTest.java @@ -33,6 +33,7 @@ import java.util.Collection; import java.util.Iterator; import java.util.List; +import java.util.Optional; /** * Test class for LiftoverVcf. @@ -288,22 +289,19 @@ public void testVcfSorted(final boolean disableSort) { Assert.assertEquals(runPicardCommandLine(args), 0); //check input/header try (final VCFFileReader inputReader = new VCFFileReader(input, false)) { - final VCFHeader header = inputReader.getFileHeader(); - Assert.assertNull(header.getOtherHeaderLine("reference")); - } + final VCFHeader header = inputReader.getFileHeader(); + Assert.assertNull(header.getOtherHeaderLine("reference")); + } //try to open with / without index try (final VCFFileReader liftReader = new VCFFileReader(liftOutputFile, !disableSort)) { - final VCFHeader header = liftReader.getFileHeader(); - Assert.assertNotNull(header.getOtherHeaderLine("reference")); - try (final CloseableIterator iter = liftReader.iterator()) { + final VCFHeader header = liftReader.getFileHeader(); + Assert.assertNotNull(header.getOtherHeaderLine("reference")); + try (final CloseableIterator iter = liftReader.iterator()) { Assert.assertEquals(iter.stream().count(), 5L); - } } } - + } - - @DataProvider Iterator testWriteVcfData() { @@ -373,7 +371,6 @@ public void testWriteVcfWithFlippedAlleles( VcfTestUtils.assertVcfFilesAreEqual(rejectOutputFile, expectedRejectVcf); } - @DataProvider(name = "indelFlipData") public Iterator indelFlipData() { @@ -624,7 +621,7 @@ public Iterator indelFlipData() { builder.genotypes(genotypeBuilder.make()); result_builder.genotypes(resultGenotypeBuilder.make()); tests.add(new Object[]{builder.make(), REFERENCE, result_builder.make()}); - + builder.noGenotypes(); result_builder.noGenotypes(); @@ -1011,8 +1008,8 @@ public Iterator indelNoFlipData() { // trivial case indel builder.source("testWithEND"); - builder.start(start).stop(stop).alleles(CollectionUtil.makeList(CAAARef, CAA)).attribute(VCFConstants.END_KEY,(int)stop); - resultBuilder.start(1).stop(4).alleles(CollectionUtil.makeList(CAAARef, CAA)).attribute(VCFConstants.END_KEY,(int)4); + builder.start(start).stop(stop).alleles(CollectionUtil.makeList(CAAARef, CAA)).attribute(VCFConstants.END_KEY, (int) stop); + resultBuilder.start(1).stop(4).alleles(CollectionUtil.makeList(CAAARef, CAA)).attribute(VCFConstants.END_KEY, (int) 4); genotypeBuilder.alleles(builder.getAlleles()); resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); builder.genotypes(genotypeBuilder.make()); @@ -1068,8 +1065,9 @@ public Iterator indelNoFlipData() { builder.source("test6_withEND"); start = 541; offset = 5; - builder.start(start).stop(start).alleles(CollectionUtil.makeList(ARef, T)).attribute(VCFConstants.END_KEY, start);; - resultBuilder.start(start + offset).stop(start + offset).alleles(CollectionUtil.makeList(ARef, T)).attribute(VCFConstants.END_KEY, start+offset); + builder.start(start).stop(start).alleles(CollectionUtil.makeList(ARef, T)).attribute(VCFConstants.END_KEY, start); + ; + resultBuilder.start(start + offset).stop(start + offset).alleles(CollectionUtil.makeList(ARef, T)).attribute(VCFConstants.END_KEY, start + offset); genotypeBuilder.alleles(builder.getAlleles()); resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); builder.genotypes(genotypeBuilder.make()); @@ -1172,12 +1170,39 @@ public Iterator indelNoFlipData() { return tests.iterator(); } - @Test(dataProvider = "indelNoFlipData") - public void testLiftOverSimpleIndels(final LiftOver liftOver, final ReferenceSequence reference, final VariantContext source, final VariantContext result) { + @DataProvider + public Iterator cleanIndelNoFlipData(){ + Iterator sourceIterator = indelNoFlipData(); + + return new Iterator() { + @Override + public boolean hasNext() { + return sourceIterator.hasNext(); + } + + @Override + public Object[] next() { + final Object[] next = sourceIterator.next(); + final LiftOver liftOver = (LiftOver) next[0]; + final VariantContext source = (VariantContext) next[2]; + final VariantContext result = (VariantContext) next[3]; + return new Object[]{ + liftOver, + Optional.ofNullable(source).map(v -> new VariantContextBuilder(v).rmAttribute(VCFConstants.END_KEY).make()).orElse(null), + Optional.ofNullable(result).map(v -> new VariantContextBuilder(v).rmAttribute(VCFConstants.END_KEY).make()).orElse(null) + }; + } + }; + } + + @Test(dataProvider = "cleanIndelNoFlipData") + public void testLiftOverSimpleIndels(final LiftOver liftOver, final VariantContext source, final VariantContext result) { final Interval target = liftOver.liftOver(new Interval(source.getContig(), source.getStart(), source.getEnd()), 1); - VariantContextBuilder vcb = LiftoverUtils.liftSimpleVariantContext(source, target); + final VariantContextBuilder vcb = LiftoverUtils.liftSimpleVariantContext(source, target); + // liftSimpleVariantContext doesn't take care of end attributes... + VcfTestUtils.assertEquals(vcb == null ? null : vcb.make(), result); } @@ -1186,7 +1211,7 @@ public void testLiftOverIndels(final LiftOver liftOver, final ReferenceSequence final Interval target = liftOver.liftOver(new Interval(source.getContig(), source.getStart(), source.getEnd()), 1); - VariantContext vc = LiftoverUtils.liftVariant(source, target,reference,false,false); + final VariantContext vc = LiftoverUtils.liftVariant(source, target,reference,false,false); VcfTestUtils.assertEquals(vc, result); } From 2366b461d715a3ac4e2766b13a39fe29f2862c97 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Tue, 18 Feb 2020 22:27:02 +0200 Subject: [PATCH 3/8] - making Symbolic Alleles great again! --- src/main/java/picard/util/LiftoverUtils.java | 10 +++++- .../java/picard/util/LiftoverUtilsTest.java | 6 +++- .../java/picard/util/LiftoverVcfTest.java | 36 ++++++++++++------- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/src/main/java/picard/util/LiftoverUtils.java b/src/main/java/picard/util/LiftoverUtils.java index 253b6ab884..390eeeca3a 100644 --- a/src/main/java/picard/util/LiftoverUtils.java +++ b/src/main/java/picard/util/LiftoverUtils.java @@ -364,7 +364,9 @@ protected static void leftAlignVariant(final VariantContextBuilder builder, fina // Put each allele into the alleleBasesMap unless it is a spanning deletion. // Spanning deletions are dealt with as a special case later in fixedAlleleMap. - alleles.stream().filter(a->!a.equals(Allele.SPAN_DEL)).forEach(a -> alleleBasesMap.put(a, a.getBases())); + alleles.stream() + .filter(a->!a.equals(Allele.SPAN_DEL)&&!a.isSymbolic()) + .forEach(a -> alleleBasesMap.put(a, a.getBases())); int theStart = start; int theEnd = end; @@ -434,6 +436,12 @@ protected static void leftAlignVariant(final VariantContextBuilder builder, fina // a special case here. fixedAlleleMap.put(Allele.SPAN_DEL, Allele.SPAN_DEL); + // symbolic alleles cannot be left-aligned so they need to be put + // into the map directly + alleles.stream() + .filter(Allele::isSymbolic) + .forEach(a -> fixedAlleleMap.put(a, a)); + //retain original order: List fixedAlleles = alleles.stream() .map(fixedAlleleMap::get) diff --git a/src/test/java/picard/util/LiftoverUtilsTest.java b/src/test/java/picard/util/LiftoverUtilsTest.java index 95084befbc..3c9d50ce5a 100644 --- a/src/test/java/picard/util/LiftoverUtilsTest.java +++ b/src/test/java/picard/util/LiftoverUtilsTest.java @@ -8,7 +8,11 @@ import org.testng.annotations.Test; import picard.vcf.VcfTestUtils; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; import java.util.stream.Collectors; /** diff --git a/src/test/java/picard/util/LiftoverVcfTest.java b/src/test/java/picard/util/LiftoverVcfTest.java index e39c907c63..62b3718dc6 100644 --- a/src/test/java/picard/util/LiftoverVcfTest.java +++ b/src/test/java/picard/util/LiftoverVcfTest.java @@ -768,6 +768,9 @@ public Iterator leftAlignAllelesData() { final Allele T = Allele.create("T", false); + final Allele nonRef1 = Allele.create("<*>", false); + final Allele nonRef2 = Allele.create("", false); + final List tests = new ArrayList<>(); final VariantContextBuilder builder = new VariantContextBuilder().source("test1").chr("chr1"); @@ -803,6 +806,14 @@ public Iterator leftAlignAllelesData() { result_builder.genotypes(resultGenotypeBuilder.make()); tests.add(new Object[]{builder.make(), REFERENCE, result_builder.make()}); + builder.alleles(CollectionUtil.makeList(RefG, A, nonRef1)); + result_builder.alleles(builder.getAlleles()); + resultGenotypeBuilder.alleles(CollectionUtil.makeList(RefG, A)); + genotypeBuilder.alleles(CollectionUtil.makeList(RefG, A)); + builder.genotypes(genotypeBuilder.make()); + result_builder.genotypes(resultGenotypeBuilder.make()); + tests.add(new Object[]{builder.make(), REFERENCE, result_builder.make()}); + for (start = 1; start <= REFERENCE.getBases().length; start++) { builder.source("test2-" + start); builder.start(start).stop(start); @@ -1171,7 +1182,7 @@ public Iterator indelNoFlipData() { } @DataProvider - public Iterator cleanIndelNoFlipData(){ + public Iterator cleanIndelNoFlipData() { Iterator sourceIterator = indelNoFlipData(); return new Iterator() { @@ -1211,16 +1222,15 @@ public void testLiftOverIndels(final LiftOver liftOver, final ReferenceSequence final Interval target = liftOver.liftOver(new Interval(source.getContig(), source.getStart(), source.getEnd()), 1); - final VariantContext vc = LiftoverUtils.liftVariant(source, target,reference,false,false); + final VariantContext vc = LiftoverUtils.liftVariant(source, target, reference, false, false); VcfTestUtils.assertEquals(vc, result); } - @DataProvider(name = "noCallAndSymbolicData") public Iterator noCallAndSymbolicData() { - final VariantContextBuilder builder = new VariantContextBuilder().source("test1").chr("chr1").attribute("TEST_ATTRIBUTE",1).attribute("TEST_ATTRIBUTE2","Hi").attribute("TEST_ATTRIBUTE3",new double[] {1.2,2.1}); - final VariantContextBuilder result_builder = new VariantContextBuilder().source("test1").chr("chr1").attribute("TEST_ATTRIBUTE",1).attribute("TEST_ATTRIBUTE2","Hi").attribute("TEST_ATTRIBUTE3",new double[] {1.2,2.1}); + final VariantContextBuilder builder = new VariantContextBuilder().source("test1").chr("chr1").attribute("TEST_ATTRIBUTE", 1).attribute("TEST_ATTRIBUTE2", "Hi").attribute("TEST_ATTRIBUTE3", new double[]{1.2, 2.1}); + final VariantContextBuilder result_builder = new VariantContextBuilder().source("test1").chr("chr1").attribute("TEST_ATTRIBUTE", 1).attribute("TEST_ATTRIBUTE2", "Hi").attribute("TEST_ATTRIBUTE3", new double[]{1.2, 2.1}); result_builder.attribute(LiftoverVcf.ORIGINAL_CONTIG, "chr1"); final GenotypeBuilder genotypeBuilder = new GenotypeBuilder("test1"); final GenotypeBuilder resultGenotypeBuilder = new GenotypeBuilder("test1"); @@ -1231,14 +1241,15 @@ public Iterator noCallAndSymbolicData() { final Allele T = Allele.create("T", false); final Allele A = Allele.create("A", false); final Allele DEL = Allele.create("*", false); + final Allele nonRef = Allele.create("<*>", false); final LiftOver liftOver = new LiftOver(TWO_INTERVAL_CHAIN_FILE); final LiftOver liftOverRC = new LiftOver(CHAIN_FILE); builder.source("test1"); int start = 10; - builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T)); - result_builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T)); + builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T, nonRef)); + result_builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T, nonRef)); genotypeBuilder.alleles(CollectionUtil.makeList(Allele.create("."), Allele.create("."))); resultGenotypeBuilder.alleles(CollectionUtil.makeList(Allele.create("."), Allele.create("."))); builder.genotypes(genotypeBuilder.make()); @@ -1263,13 +1274,12 @@ public Iterator noCallAndSymbolicData() { int offset = 3; start = CHAIN_SIZE - offset; int liftedStart = 1 + offset; - builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T, DEL)); - result_builder.start(liftedStart).stop(liftedStart).alleles(CollectionUtil.makeList(GRef, A, DEL)); + builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T, DEL, nonRef)); + result_builder.start(liftedStart).stop(liftedStart).alleles(CollectionUtil.makeList(GRef, A, DEL, nonRef)); result_builder.attribute(LiftoverVcf.ORIGINAL_START, start); result_builder.attribute(LiftoverVcf.ORIGINAL_ALLELES, LiftoverUtils.allelesToStringList(builder.getAlleles())); result_builder.attribute(LiftoverUtils.REV_COMPED_ALLELES, true); - genotypeBuilder.alleles(CollectionUtil.makeList(T, DEL)); resultGenotypeBuilder.alleles(CollectionUtil.makeList(A, DEL)); builder.genotypes(genotypeBuilder.make()); @@ -1281,8 +1291,8 @@ public Iterator noCallAndSymbolicData() { offset = 4; start = CHAIN_SIZE - offset; liftedStart = 1 + offset; - builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T)); - result_builder.start(liftedStart).stop(liftedStart).alleles(CollectionUtil.makeList(GRef, A)); + builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T, nonRef)); + result_builder.start(liftedStart).stop(liftedStart).alleles(CollectionUtil.makeList(GRef, A, nonRef)); result_builder.attribute(LiftoverVcf.ORIGINAL_START, start); result_builder.attribute(LiftoverVcf.ORIGINAL_ALLELES, LiftoverUtils.allelesToStringList(builder.getAlleles())); genotypeBuilder.alleles(CollectionUtil.makeList(T, Allele.NO_CALL)); @@ -1313,7 +1323,7 @@ public void testLiftOverNoCallAndSymbolic(final LiftOver liftOver, final Variant source.getAlleles().forEach(a -> resultAlleles.add(a.getDisplayString())); Assert.assertEquals(resultAlleles, result.getAttributeAsStringList(LiftoverVcf.ORIGINAL_ALLELES, null)); } - result.getAttributes().entrySet().stream().filter(e->e.getKey().matches("TEST_ATTRIBUTE.*")).forEach(e -> { + result.getAttributes().entrySet().stream().filter(e -> e.getKey().matches("TEST_ATTRIBUTE.*")).forEach(e -> { Assert.assertTrue(vc.hasAttribute(e.getKey())); Assert.assertEquals(vc.getAttribute(e.getKey()), e.getValue()); From f93689dd5e1cdb05a0f3d6e9207fb88d16f2ef4b Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Wed, 19 Feb 2020 14:46:55 +0200 Subject: [PATCH 4/8] - WIP. problems with tests.... --- src/main/java/picard/util/LiftoverUtils.java | 39 +++++++++--- src/main/java/picard/vcf/LiftoverVcf.java | 63 ++++++++++++------- .../java/picard/util/LiftoverVcfTest.java | 6 +- .../LiftOver/testLiftoverBiallelicIndels.vcf | 4 +- 4 files changed, 78 insertions(+), 34 deletions(-) diff --git a/src/main/java/picard/util/LiftoverUtils.java b/src/main/java/picard/util/LiftoverUtils.java index 390eeeca3a..584a1013ad 100644 --- a/src/main/java/picard/util/LiftoverUtils.java +++ b/src/main/java/picard/util/LiftoverUtils.java @@ -29,12 +29,22 @@ import htsjdk.samtools.util.Log; import htsjdk.samtools.util.SequenceUtil; import htsjdk.samtools.util.StringUtil; -import htsjdk.variant.variantcontext.*; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.GenotypeBuilder; +import htsjdk.variant.variantcontext.GenotypesContext; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.vcf.VCFConstants; import org.apache.commons.lang3.ArrayUtils; import picard.vcf.LiftoverVcf; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.stream.Collectors; public class LiftoverUtils { @@ -159,17 +169,20 @@ protected static VariantContextBuilder reverseComplementVariantContext(final Var // When reverse complementing, will be necessary to include this additional base. // This check prevents the extremely rare situation in which the indel occurs on the // first base of the sequence - final boolean addToStart = source.isIndel() && target.getStart() > 1; + + final boolean indelForLiftover = isIndelForLiftover(source); + final boolean addToStart = indelForLiftover && target.getStart() > 1; final int start = target.getStart() - (addToStart ? 1 : 0); vcb.start(start); - final int stop = target.getEnd() - (addToStart ? 1 : 0); + final int stop = target.getEnd() + (addToStart ? 0 : 1); vcb.stop(stop); - vcb.alleles(reverseComplementAlleles(origAlleles, target, refSeq, source.isIndel(), addToStart)); + vcb.alleles(reverseComplementAlleles(origAlleles, target, refSeq, indelForLiftover, addToStart)); + - if (!source.isSNP()) { + if (indelForLiftover) { // check that the reverse complemented bases match the new reference if (!referenceAlleleMatchesReferenceForIndel(vcb.getAlleles(), refSeq, start, stop)) { return null; @@ -182,6 +195,16 @@ protected static VariantContextBuilder reverseComplementVariantContext(final Var return vcb; } + private static boolean isIndelForLiftover(final VariantContext vc){ + final Allele ref = vc.getReference(); + if (ref.length() != 1) { + return true; + } + + return vc.getAlleles().stream().filter(a -> !a.isSymbolic()).filter(a -> !a.equals(Allele.SPAN_DEL)). + anyMatch(a -> a.length() != 1); + } + private static List reverseComplementAlleles(final List originalAlleles, final Interval target, final ReferenceSequence refSeq, final boolean isIndel, final boolean addToStart) { final List alleles = new ArrayList<>(); @@ -203,9 +226,9 @@ private static Allele reverseComplement(final Allele oldAllele, final Interval t if (addToStart) { alleleBuilder.append((char) referenceSequence.getBases()[target.getStart() - 2]); } - alleleBuilder.append(SequenceUtil.reverseComplement(oldAllele.getBaseString().substring(1, oldAllele.length()))); + alleleBuilder.append(SequenceUtil.reverseComplement(oldAllele.getBaseString().substring(0, oldAllele.length()))); if (!addToStart) { - alleleBuilder.append((char) referenceSequence.getBases()[target.getEnd() - 1]); + alleleBuilder.append((char) referenceSequence.getBases()[target.getEnd()]); } return Allele.create(alleleBuilder.toString(), oldAllele.isReference()); diff --git a/src/main/java/picard/vcf/LiftoverVcf.java b/src/main/java/picard/vcf/LiftoverVcf.java index 490b07e30d..dbfcd306e0 100644 --- a/src/main/java/picard/vcf/LiftoverVcf.java +++ b/src/main/java/picard/vcf/LiftoverVcf.java @@ -30,14 +30,28 @@ import htsjdk.samtools.liftover.LiftOver; import htsjdk.samtools.reference.ReferenceSequence; import htsjdk.samtools.reference.ReferenceSequenceFileWalker; -import htsjdk.samtools.util.*; +import htsjdk.samtools.util.CloserUtil; +import htsjdk.samtools.util.CollectionUtil; +import htsjdk.samtools.util.IOUtil; +import htsjdk.samtools.util.Interval; +import htsjdk.samtools.util.Log; +import htsjdk.samtools.util.ProgressLogger; +import htsjdk.samtools.util.SortingCollection; +import htsjdk.samtools.util.StringUtil; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.variantcontext.writer.Options; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder; -import htsjdk.variant.vcf.*; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFFilterHeaderLine; +import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFHeaderLine; +import htsjdk.variant.vcf.VCFHeaderLineCount; +import htsjdk.variant.vcf.VCFHeaderLineType; +import htsjdk.variant.vcf.VCFInfoHeaderLine; +import htsjdk.variant.vcf.VCFRecordCodec; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; @@ -50,7 +64,15 @@ import java.io.File; import java.text.DecimalFormat; import java.text.NumberFormat; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; import java.util.stream.Collectors; /** @@ -530,29 +552,26 @@ private void tryToAddVariant(final VariantContext vc, final ReferenceSequence re } // Check that the reference allele still agrees with the reference sequence - boolean mismatchesReference = false; - for (final Allele allele : vc.getAlleles()) { - if (allele.isReference()) { - final byte[] ref = refSeq.getBases(); - final String refString = StringUtil.bytesToString(ref, vc.getStart() - 1, vc.getEnd() - vc.getStart() + 1); - - if (!refString.equalsIgnoreCase(allele.getBaseString())) { - // consider that the ref and the alt may have been swapped in a simple biallelic SNP - if (vc.isBiallelic() && vc.isSNP() && refString.equalsIgnoreCase(vc.getAlternateAllele(0).getBaseString())) { - if (RECOVER_SWAPPED_REF_ALT) { - totalTrackedAsSwapRefAlt++; - addAndTrack(LiftoverUtils.swapRefAlt(vc, TAGS_TO_REVERSE, TAGS_TO_DROP), source); - return; - } else { - totalTrackedAsSwapRefAlt++; - } - } - mismatchesReference = true; + final boolean mismatchesReference; + final Allele allele = vc.getReference(); + final byte[] ref = refSeq.getBases(); + final String refString = StringUtil.bytesToString(ref, vc.getStart() - 1, vc.getEnd() - vc.getStart() + 1); + + if (!refString.equalsIgnoreCase(allele.getBaseString())) { + // consider that the ref and the alt may have been swapped in a simple biallelic SNP + if (vc.isBiallelic() && vc.isSNP() && refString.equalsIgnoreCase(vc.getAlternateAllele(0).getBaseString())) { + totalTrackedAsSwapRefAlt++; + if (RECOVER_SWAPPED_REF_ALT) { + addAndTrack(LiftoverUtils.swapRefAlt(vc, TAGS_TO_REVERSE, TAGS_TO_DROP), source); + return; } - break; } + mismatchesReference = true; + } else { + mismatchesReference = false; } + if (mismatchesReference) { rejectedRecords.add(new VariantContextBuilder(source) .filter(FILTER_MISMATCHING_REF_ALLELE) diff --git a/src/test/java/picard/util/LiftoverVcfTest.java b/src/test/java/picard/util/LiftoverVcfTest.java index 62b3718dc6..3ef13473b2 100644 --- a/src/test/java/picard/util/LiftoverVcfTest.java +++ b/src/test/java/picard/util/LiftoverVcfTest.java @@ -382,6 +382,7 @@ public Iterator indelFlipData() { final Allele RefCG = Allele.create("CG", true); final Allele RefT = Allele.create("T", true); final Allele RefA = Allele.create("A", true); + final Allele RefAC = Allele.create("AC", true); final Allele RefC = Allele.create("C", true); final Allele RefG = Allele.create("G", true); @@ -389,6 +390,7 @@ public Iterator indelFlipData() { final Allele T = Allele.create("T", false); final Allele C = Allele.create("C", false); final Allele CG = Allele.create("CG", false); + final Allele CGT = Allele.create("CGT", false); final Allele GA = Allele.create("GA", false); final Allele TC = Allele.create("TC", false); final Allele CAA = Allele.create("CAA", false); @@ -500,12 +502,12 @@ public Iterator indelFlipData() { tests.add(new Object[]{builder.make(), REFERENCE, null}); // MNP - // GTT*(T)/ACGT(T) -> AAA(C)*/AACG(T) -> which is then normalized to A*/CG at position 11 + // GTT*(T)/ACGT(T) -> AAA(C)*/AACG(T) -> which is then normalized to AC*/CGT at position 11 // pos 11 ^ in the result start = CHAIN_SIZE - 11; stop = start + 2; builder.source("test7").stop(stop).start(start).alleles(CollectionUtil.makeList(RefGTT, ACGT)); - result_builder.start(11).stop(11).alleles(CollectionUtil.makeList(RefA, CG)); + result_builder.start(11).stop(12).alleles(CollectionUtil.makeList(RefAC, CGT)); genotypeBuilder.alleles(builder.getAlleles()); resultGenotypeBuilder.alleles(result_builder.getAlleles()); builder.genotypes(genotypeBuilder.make()); diff --git a/testdata/picard/vcf/LiftOver/testLiftoverBiallelicIndels.vcf b/testdata/picard/vcf/LiftOver/testLiftoverBiallelicIndels.vcf index 5671f1c98e..7977b94dd4 100644 --- a/testdata/picard/vcf/LiftOver/testLiftoverBiallelicIndels.vcf +++ b/testdata/picard/vcf/LiftOver/testLiftoverBiallelicIndels.vcf @@ -2,8 +2,8 @@ ##FORMAT= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 -chr1 1 . C CCCCT 15676.17 PASS END=1 GT 0/0 -chr1 61 . GT G 724.43 PASS END=62 GT 0/1 +chr1 1 . T TCCCC 15676.17 PASS END=1 GT 0/0 +chr1 60 . GT G 724.43 PASS END=61 GT 0/1 chr1 72 . T A 100 PASS END=72 GT 0/1 chr1 72 . T A 100 PASS END=72 GT ./. chr1 72 . TT A 100 PASS END=73 GT 1/. \ No newline at end of file From 828705bdd73486698c7540d441db0db5dd59f8e2 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Wed, 19 Feb 2020 22:33:10 +0200 Subject: [PATCH 5/8] - WIP. problems with tests.... --- src/main/java/picard/util/LiftoverUtils.java | 2 +- .../java/picard/util/LiftoverVcfTest.java | 21 ++++++++++--------- .../picard/vcf/LiftOver/dummy.reference.fasta | 4 ++-- .../LiftOver/testLiftoverBiallelicIndels.vcf | 2 +- .../LiftOver/testLiftoverMismatchingSnps.vcf | 8 +++---- .../testLiftoverMultiallelicIndels.vcf | 4 ++-- 6 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/main/java/picard/util/LiftoverUtils.java b/src/main/java/picard/util/LiftoverUtils.java index 584a1013ad..073dd26456 100644 --- a/src/main/java/picard/util/LiftoverUtils.java +++ b/src/main/java/picard/util/LiftoverUtils.java @@ -176,7 +176,7 @@ protected static VariantContextBuilder reverseComplementVariantContext(final Var final int start = target.getStart() - (addToStart ? 1 : 0); vcb.start(start); - final int stop = target.getEnd() + (addToStart ? 0 : 1); + final int stop = target.getEnd() + (addToStart || !indelForLiftover ? 0 : 1); vcb.stop(stop); vcb.alleles(reverseComplementAlleles(origAlleles, target, refSeq, indelForLiftover, addToStart)); diff --git a/src/test/java/picard/util/LiftoverVcfTest.java b/src/test/java/picard/util/LiftoverVcfTest.java index 3ef13473b2..5ef5828121 100644 --- a/src/test/java/picard/util/LiftoverVcfTest.java +++ b/src/test/java/picard/util/LiftoverVcfTest.java @@ -72,10 +72,11 @@ public void teardown() { @DataProvider(name = "liftoverReverseStrand") public Object[][] liftoverReverseStrand() { return new Object[][]{ - {"testLiftoverBiallelicIndels.vcf", 5, 0}, - {"testLiftoverMultiallelicIndels.vcf", 2, 0}, - {"testLiftoverFailingVariants.vcf", 3, 0}, - {"testLiftoverMixedVariants.vcf", 4, 0}, +// {"testLiftoverBiallelicIndels.vcf", 5, 0}, +// {"testLiftoverMultiallelicIndels.vcf", 2, 0}, +// {"testLiftoverFailingVariants.vcf", 3, 0}, +// {"testLiftoverMixedVariants.vcf", 4, 0}, + {"testLiftoverMismatchingSnps.vcf", 3, 1}, }; } @@ -99,11 +100,13 @@ public void testReverseComplementedIndels(final String filename, final int expec }; Assert.assertEquals(runPicardCommandLine(args), 0); - final VCFFileReader liftReader = new VCFFileReader(liftOutputFile, false); - Assert.assertEquals(liftReader.iterator().stream().count(), expectedPassing, "The wrong number of variants were lifted over."); + try (VCFFileReader liftReader = new VCFFileReader(liftOutputFile, false)) { + Assert.assertEquals(liftReader.iterator().stream().count(), expectedPassing, "The wrong number of variants were lifted over."); + } - final VCFFileReader rejectReader = new VCFFileReader(rejectOutputFile, false); - Assert.assertEquals(rejectReader.iterator().stream().count(), expectedFailing, "The wrong number of variants were rejected."); + try (VCFFileReader rejectReader = new VCFFileReader(rejectOutputFile, false)) { + Assert.assertEquals(rejectReader.iterator().stream().count(), expectedFailing, "The wrong number of variants were rejected."); + } } @Test @@ -313,7 +316,6 @@ Iterator testWriteVcfData() { tests.add(new Object[]{input, expectedVcf, expectedRejectVcf, TWO_INTERVALS_REFERENCE_FILE, TWO_INTERVAL_CHAIN_FILE}); } - { final File input = new File(TEST_DATA_PATH, "testLiftoverMismatchingSnps.vcf"); final File expectedVcf = new File(TEST_DATA_PATH, "vcfWithFlippedAllelesNegativeChain.lift.vcf"); @@ -363,7 +365,6 @@ public void testWriteVcfWithFlippedAlleles( for (final VariantContext vc : liftReader) { Assert.assertFalse(vc.hasAttribute(LiftoverVcf.ORIGINAL_CONTIG)); Assert.assertFalse(vc.hasAttribute(LiftoverVcf.ORIGINAL_START)); - } } diff --git a/testdata/picard/vcf/LiftOver/dummy.reference.fasta b/testdata/picard/vcf/LiftOver/dummy.reference.fasta index 176e1532f8..18b6f4c1d1 100644 --- a/testdata/picard/vcf/LiftOver/dummy.reference.fasta +++ b/testdata/picard/vcf/LiftOver/dummy.reference.fasta @@ -6,5 +6,5 @@ CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGA +CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAG diff --git a/testdata/picard/vcf/LiftOver/testLiftoverBiallelicIndels.vcf b/testdata/picard/vcf/LiftOver/testLiftoverBiallelicIndels.vcf index 7977b94dd4..a6a6061910 100644 --- a/testdata/picard/vcf/LiftOver/testLiftoverBiallelicIndels.vcf +++ b/testdata/picard/vcf/LiftOver/testLiftoverBiallelicIndels.vcf @@ -2,7 +2,7 @@ ##FORMAT= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 -chr1 1 . T TCCCC 15676.17 PASS END=1 GT 0/0 +chr1 1 . C CCCCC 15676.17 PASS END=1 GT 0/0 chr1 60 . GT G 724.43 PASS END=61 GT 0/1 chr1 72 . T A 100 PASS END=72 GT 0/1 chr1 72 . T A 100 PASS END=72 GT ./. diff --git a/testdata/picard/vcf/LiftOver/testLiftoverMismatchingSnps.vcf b/testdata/picard/vcf/LiftOver/testLiftoverMismatchingSnps.vcf index 3d5414922b..852e79a2f1 100644 --- a/testdata/picard/vcf/LiftOver/testLiftoverMismatchingSnps.vcf +++ b/testdata/picard/vcf/LiftOver/testLiftoverMismatchingSnps.vcf @@ -4,8 +4,8 @@ ##FORMAT= ##FORMAT= ##INFO= +##chr1 72 chr1_72_C_A C A 100 PASS . GT:AD:PL 1/1:10,11:50,0,60 +##chr1 1 chr1_1_C_CCCCT C CCCCT 15676.17 PASS . GT 0/0 +##chr1 73 chr1_73_G_T T G 100 PASS . GT:AD:PL 0/0:10,11:50,0,60 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 -chr1 1 . C CCCCT 15676.17 PASS . GT 0/0 -chr1 61 . GT G 724.43 PASS . GT 0/1 -chr1 72 chr1_72_C_A C A 100 PASS . GT:AD:PL 1/1:10,11:50,0,60 -chr1 73 chr1_73_G_T G T 100 PASS . GT:AD:PL 0/0:10,11:50,0,60 +chr1 61 chr1_61_GT_G GT G 724.43 PASS . GT 0/1 diff --git a/testdata/picard/vcf/LiftOver/testLiftoverMultiallelicIndels.vcf b/testdata/picard/vcf/LiftOver/testLiftoverMultiallelicIndels.vcf index acc72f1765..2e8383a719 100644 --- a/testdata/picard/vcf/LiftOver/testLiftoverMultiallelicIndels.vcf +++ b/testdata/picard/vcf/LiftOver/testLiftoverMultiallelicIndels.vcf @@ -1,4 +1,4 @@ ##fileformat=VCFv4.1 #CHROM POS ID REF ALT QUAL FILTER INFO -chr1 1 . C CCCCT,CCCt 15676.17 PASS . -chr1 61 . CT C,CTA 724.43 PASS . +chr1 1 . C CCCCT,CCCT 15676.17 PASS . +chr1 61 . TC T,TCTA 724.43 PASS . From 34d676561eaefa65fa35902db93436982693347d Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Thu, 20 Feb 2020 00:42:29 +0200 Subject: [PATCH 6/8] - Greatly simplified liftover logic for indels. - Symbolic alleles are now lifted over as well, by simply carrying them along. --- src/main/java/picard/util/LiftoverUtils.java | 58 ++++++------------- .../java/picard/util/LiftoverVcfTest.java | 44 ++++---------- .../LiftOver/testLiftoverFailingVariants.vcf | 2 +- .../LiftOver/testLiftoverMismatchingSnps.vcf | 8 +-- .../LiftOver/vcfWithFlippedAlleles.lift.vcf | 2 +- .../LiftOver/vcfWithFlippedAlleles.reject.vcf | 4 +- ...cfWithFlippedAllelesNegativeChain.lift.vcf | 6 +- .../picard/vcf/LiftOver/vcfWithMixed.lift.vcf | 2 +- 8 files changed, 42 insertions(+), 84 deletions(-) diff --git a/src/main/java/picard/util/LiftoverUtils.java b/src/main/java/picard/util/LiftoverUtils.java index 073dd26456..bac4e40e77 100644 --- a/src/main/java/picard/util/LiftoverUtils.java +++ b/src/main/java/picard/util/LiftoverUtils.java @@ -42,6 +42,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -58,8 +59,8 @@ public class LiftoverUtils { /** * Default list of attributes that need to be reversed or dropped from the INFO field when alleles have been swapped. */ - public static final Collection DEFAULT_TAGS_TO_REVERSE = Arrays.asList("AF"); - public static final Collection DEFAULT_TAGS_TO_DROP = Arrays.asList("MAX_AF"); + public static final Collection DEFAULT_TAGS_TO_REVERSE = Collections.singletonList("AF"); + public static final Collection DEFAULT_TAGS_TO_DROP = Collections.singletonList("MAX_AF"); public static final Log log = Log.getInstance(LiftoverUtils.class); @@ -165,26 +166,20 @@ protected static VariantContextBuilder reverseComplementVariantContext(final Var vcb.rmAttribute(VCFConstants.END_KEY); vcb.chr(target.getContig()); - // By convention, indels are left aligned and include the base prior to that indel. - // When reverse complementing, will be necessary to include this additional base. - // This check prevents the extremely rare situation in which the indel occurs on the - // first base of the sequence - final boolean indelForLiftover = isIndelForLiftover(source); - final boolean addToStart = indelForLiftover && target.getStart() > 1; - final int start = target.getStart() - (addToStart ? 1 : 0); + final int start = target.getStart() ;//- (addToStart ? 1 : 0); vcb.start(start); - final int stop = target.getEnd() + (addToStart || !indelForLiftover ? 0 : 1); + final int stop = target.getEnd();// + (addToStart || !indelForLiftover ? 0 : 1); vcb.stop(stop); - vcb.alleles(reverseComplementAlleles(origAlleles, target, refSeq, indelForLiftover, addToStart)); + vcb.alleles(reverseComplementAlleles(origAlleles)); if (indelForLiftover) { // check that the reverse complemented bases match the new reference - if (!referenceAlleleMatchesReferenceForIndel(vcb.getAlleles(), refSeq, start, stop)) { + if (referenceAlleleDiffersFromReferenceForIndel(vcb.getAlleles(), refSeq, start, stop)) { return null; } leftAlignVariant(vcb, start, stop, vcb.getAlleles(), refSeq); @@ -205,33 +200,20 @@ private static boolean isIndelForLiftover(final VariantContext vc){ anyMatch(a -> a.length() != 1); } - private static List reverseComplementAlleles(final List originalAlleles, final Interval target, final ReferenceSequence refSeq, final boolean isIndel, final boolean addToStart) { + private static List reverseComplementAlleles(final List originalAlleles) { final List alleles = new ArrayList<>(); for (final Allele oldAllele : originalAlleles) { - alleles.add(LiftoverUtils.reverseComplement(oldAllele, target, refSeq, isIndel, addToStart)); + alleles.add(LiftoverUtils.reverseComplement(oldAllele)); } return alleles; } - private static Allele reverseComplement(final Allele oldAllele, final Interval target, final ReferenceSequence referenceSequence, final boolean isIndel, final boolean addToStart) { + private static Allele reverseComplement(final Allele oldAllele) { if (oldAllele.isSymbolic() || oldAllele.isNoCall() || oldAllele.equals(Allele.SPAN_DEL)) { return oldAllele; - } else if (isIndel) { - // target.getStart is 1-based, reference bases are 0-based - final StringBuilder alleleBuilder = new StringBuilder(target.getEnd() - target.getStart() + 1); - - if (addToStart) { - alleleBuilder.append((char) referenceSequence.getBases()[target.getStart() - 2]); - } - alleleBuilder.append(SequenceUtil.reverseComplement(oldAllele.getBaseString().substring(0, oldAllele.length()))); - if (!addToStart) { - alleleBuilder.append((char) referenceSequence.getBases()[target.getEnd()]); - } - - return Allele.create(alleleBuilder.toString(), oldAllele.isReference()); } else { return Allele.create(SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference()); } @@ -357,13 +339,13 @@ public static VariantContext swapRefAlt(final VariantContext vc, final Collectio * @param end the end position of the actual indel * @return true if they match, false otherwise */ - protected static boolean referenceAlleleMatchesReferenceForIndel(final List alleles, - final ReferenceSequence referenceSequence, - final int start, - final int end) { + private static boolean referenceAlleleDiffersFromReferenceForIndel(final List alleles, + final ReferenceSequence referenceSequence, + final int start, + final int end) { final String refString = StringUtil.bytesToString(referenceSequence.getBases(), start - 1, end - start + 1); final Allele refAllele = alleles.stream().filter(Allele::isReference).findAny().orElseThrow(() -> new IllegalStateException("Error: no reference allele was present")); - return (refString.equalsIgnoreCase(refAllele.getBaseString())); + return (!refString.equalsIgnoreCase(refAllele.getBaseString())); } /** @@ -377,7 +359,7 @@ protected static boolean referenceAlleleMatchesReferenceForIndel(final List alleles, final ReferenceSequence referenceSequence) { // make sure that referenceAllele matches reference - if (!referenceAlleleMatchesReferenceForIndel(alleles, referenceSequence, start, end)) { + if (referenceAlleleDiffersFromReferenceForIndel(alleles, referenceSequence, start, end)) { throw new IllegalArgumentException(String.format("Reference allele doesn't match reference at %s:%d-%d", referenceSequence.getName(), start, end)); } @@ -403,9 +385,7 @@ protected static void leftAlignVariant(final VariantContextBuilder builder, fina .collect(Collectors.groupingBy(a -> a[a.length - 1], Collectors.toSet())) .size() == 1 && theEnd > 1) { // 3. truncate rightmost nucleotide of each allele - for (final Allele allele : alleleBasesMap.keySet()) { - alleleBasesMap.put(allele, truncateBase(alleleBasesMap.get(allele), true)); - } + alleleBasesMap.replaceAll((a, v) -> truncateBase(alleleBasesMap.get(a), true)); changesInAlleles = true; theEnd--; // 4. end if @@ -442,9 +422,7 @@ protected static void leftAlignVariant(final VariantContextBuilder builder, fina ) { //9. truncate the leftmost base of the alleles - for (final Allele allele : alleleBasesMap.keySet()) { - alleleBasesMap.put(allele, truncateBase(alleleBasesMap.get(allele), false)); - } + alleleBasesMap.replaceAll((a, v) -> truncateBase(alleleBasesMap.get(a), false)); theStart++; } diff --git a/src/test/java/picard/util/LiftoverVcfTest.java b/src/test/java/picard/util/LiftoverVcfTest.java index 5ef5828121..33952c4871 100644 --- a/src/test/java/picard/util/LiftoverVcfTest.java +++ b/src/test/java/picard/util/LiftoverVcfTest.java @@ -44,7 +44,6 @@ public class LiftoverVcfTest extends CommandLineProgramTest { private static final File TEST_DATA_PATH = new File("testdata/picard/vcf/LiftOver"); private static final File CHAIN_FILE = new File(TEST_DATA_PATH, "test.over.chain"); - private static final File POSITIVE_CHAIN_FILE = new File(TEST_DATA_PATH, "test.positive.over.chain"); private static final File TWO_INTERVAL_CHAIN_FILE = new File(TEST_DATA_PATH, "test.two.block.over.chain"); private static final File CHAIN_FILE_WITH_BAD_CONTIG = new File(TEST_DATA_PATH, "test.over.badContig.chain"); @@ -72,10 +71,10 @@ public void teardown() { @DataProvider(name = "liftoverReverseStrand") public Object[][] liftoverReverseStrand() { return new Object[][]{ -// {"testLiftoverBiallelicIndels.vcf", 5, 0}, -// {"testLiftoverMultiallelicIndels.vcf", 2, 0}, -// {"testLiftoverFailingVariants.vcf", 3, 0}, -// {"testLiftoverMixedVariants.vcf", 4, 0}, + {"testLiftoverBiallelicIndels.vcf", 5, 0}, + {"testLiftoverMultiallelicIndels.vcf", 2, 0}, + {"testLiftoverFailingVariants.vcf", 3, 0}, + {"testLiftoverMixedVariants.vcf", 4, 0}, {"testLiftoverMismatchingSnps.vcf", 3, 1}, }; } @@ -131,9 +130,7 @@ public void testChangingInfoFields() { Assert.assertEquals(runPicardCommandLine(args), 0); final VCFFileReader liftReader = new VCFFileReader(liftOutputFile, false); - final CloseableIterator iterator = liftReader.iterator(); - while (iterator.hasNext()) { - final VariantContext vc = iterator.next(); + for (final VariantContext vc : liftReader) { Assert.assertTrue(vc.hasAttribute("AF")); Assert.assertEquals(vc.hasAttribute(LiftoverUtils.SWAPPED_ALLELES), vc.hasAttribute("FLIPPED_AF"), vc.toString()); @@ -145,7 +142,6 @@ public void testChangingInfoFields() { Assert.assertTrue(TestNGUtil.compareDoubleWithAccuracy(af, flippedAf, 0.01), "Error while comparing AF. expected " + flippedAf + " but found " + af); - } } } @@ -172,6 +168,7 @@ public void testChangingTagArguments() { }; // we don't actually care what the results are here -- we just want to make sure that it doesn't fail + Assert.assertEquals(runPicardCommandLine(args), 0); } @Test @@ -390,7 +387,6 @@ public Iterator indelFlipData() { final Allele A = Allele.create("A", false); final Allele T = Allele.create("T", false); final Allele C = Allele.create("C", false); - final Allele CG = Allele.create("CG", false); final Allele CGT = Allele.create("CGT", false); final Allele GA = Allele.create("GA", false); final Allele TC = Allele.create("TC", false); @@ -681,24 +677,14 @@ public void testFlipIndelWithOriginalAlleles(final VariantContext source, final @DataProvider(name = "snpWithChangedRef") public Iterator snpWithChangedRef() { - final ReferenceSequence reference = new ReferenceSequence("chr1", 0, - refString.getBytes()); - // 123456789 123456789 123456789 123 - - final Allele RefT = Allele.create("T", true); final Allele RefA = Allele.create("A", true); final Allele RefC = Allele.create("C", true); - final Allele RefG = Allele.create("G", true); final Allele A = Allele.create("A", false); - final Allele T = Allele.create("T", false); final Allele C = Allele.create("C", false); - final Allele G = Allele.create("G", false); final List tests = new ArrayList<>(); - final int CHAIN_SIZE = 540; // the length of the single chain in POSITIVE_CHAIN_FILE - final VariantContextBuilder builder = new VariantContextBuilder().source("test1").chr("chr1"); final GenotypeBuilder genotypeBuilder = new GenotypeBuilder("test1"); final GenotypeBuilder resultGenotypeBuilder = new GenotypeBuilder("test1"); @@ -716,7 +702,7 @@ public Iterator snpWithChangedRef() { builder.genotypes(genotypeBuilder.make()); result_builder.genotypes(resultGenotypeBuilder.make()); - tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + tests.add(new Object[]{builder.make(), result_builder.make()}); builder.start(start).stop(stop).alleles(CollectionUtil.makeList(RefA, C)); result_builder.start(start).stop(stop).alleles(CollectionUtil.makeList(RefC, A)); @@ -727,16 +713,12 @@ public Iterator snpWithChangedRef() { builder.genotypes(genotypeBuilder.make()); result_builder.genotypes(resultGenotypeBuilder.make()); - tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + tests.add(new Object[]{builder.make(), result_builder.make()}); return tests.iterator(); } @Test(dataProvider = "snpWithChangedRef") - public void snpWithChangedRef(final VariantContext source, final ReferenceSequence reference, final VariantContext result) { - - final LiftOver liftOver = new LiftOver(POSITIVE_CHAIN_FILE); - final Interval originalLocus = new Interval(source.getContig(), source.getStart(), source.getEnd()); - final Interval target = liftOver.liftOver(originalLocus); + public void snpWithChangedRef(final VariantContext source, final VariantContext result) { final VariantContext flipped = LiftoverUtils.swapRefAlt(source, LiftoverUtils.DEFAULT_TAGS_TO_REVERSE, LiftoverUtils.DEFAULT_TAGS_TO_DROP); @@ -752,7 +734,6 @@ public Iterator leftAlignAllelesData() { final Allele RefA = Allele.create("A", true); final Allele RefC = Allele.create("C", true); final Allele RefAA = Allele.create("AA", true); - final Allele RefAC = Allele.create("AC", true); final Allele RefCA = Allele.create("CA", true); final Allele RefCT = Allele.create("CT", true); final Allele RefTC = Allele.create("TC", true); @@ -772,7 +753,6 @@ public Iterator leftAlignAllelesData() { final Allele T = Allele.create("T", false); final Allele nonRef1 = Allele.create("<*>", false); - final Allele nonRef2 = Allele.create("", false); final List tests = new ArrayList<>(); @@ -1022,8 +1002,8 @@ public Iterator indelNoFlipData() { // trivial case indel builder.source("testWithEND"); - builder.start(start).stop(stop).alleles(CollectionUtil.makeList(CAAARef, CAA)).attribute(VCFConstants.END_KEY, (int) stop); - resultBuilder.start(1).stop(4).alleles(CollectionUtil.makeList(CAAARef, CAA)).attribute(VCFConstants.END_KEY, (int) 4); + builder.start(start).stop(stop).alleles(CollectionUtil.makeList(CAAARef, CAA)).attribute(VCFConstants.END_KEY, stop); + resultBuilder.start(1).stop(4).alleles(CollectionUtil.makeList(CAAARef, CAA)).attribute(VCFConstants.END_KEY, 4); genotypeBuilder.alleles(builder.getAlleles()); resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); builder.genotypes(genotypeBuilder.make()); @@ -1080,7 +1060,7 @@ public Iterator indelNoFlipData() { start = 541; offset = 5; builder.start(start).stop(start).alleles(CollectionUtil.makeList(ARef, T)).attribute(VCFConstants.END_KEY, start); - ; + resultBuilder.start(start + offset).stop(start + offset).alleles(CollectionUtil.makeList(ARef, T)).attribute(VCFConstants.END_KEY, start + offset); genotypeBuilder.alleles(builder.getAlleles()); resultGenotypeBuilder.alleles(resultBuilder.getAlleles()); diff --git a/testdata/picard/vcf/LiftOver/testLiftoverFailingVariants.vcf b/testdata/picard/vcf/LiftOver/testLiftoverFailingVariants.vcf index 07b39d91a7..376397dcce 100644 --- a/testdata/picard/vcf/LiftOver/testLiftoverFailingVariants.vcf +++ b/testdata/picard/vcf/LiftOver/testLiftoverFailingVariants.vcf @@ -4,6 +4,6 @@ ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO -chr1 1 . C T 100 PASS AF=0.2;MAX_AF=1.0;FLIPPED_AF=0.8 +chr1 1 . T C 100 PASS AF=0.2;MAX_AF=1.0;FLIPPED_AF=0.8 chr1 61 . A T 100 PASS AF=0.0;MAX_AF=0.1;FLIPPED_AF=1.0 chr1 201 . TT T 100 PASS AF=1.0;MAX_AF=0.0 diff --git a/testdata/picard/vcf/LiftOver/testLiftoverMismatchingSnps.vcf b/testdata/picard/vcf/LiftOver/testLiftoverMismatchingSnps.vcf index 852e79a2f1..58a3ed657a 100644 --- a/testdata/picard/vcf/LiftOver/testLiftoverMismatchingSnps.vcf +++ b/testdata/picard/vcf/LiftOver/testLiftoverMismatchingSnps.vcf @@ -4,8 +4,8 @@ ##FORMAT= ##FORMAT= ##INFO= -##chr1 72 chr1_72_C_A C A 100 PASS . GT:AD:PL 1/1:10,11:50,0,60 -##chr1 1 chr1_1_C_CCCCT C CCCCT 15676.17 PASS . GT 0/0 -##chr1 73 chr1_73_G_T T G 100 PASS . GT:AD:PL 0/0:10,11:50,0,60 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 -chr1 61 chr1_61_GT_G GT G 724.43 PASS . GT 0/1 +chr1 1 chr1_1_C_CCCCT C CCCCT 15676.17 PASS . GT 0/0 +chr1 61 chr1_61_TC_T TC T 724.43 PASS . GT 0/1 +chr1 72 chr1_72_C_A C A 100 PASS . GT:AD:PL 1/1:10,11:50,0,60 +chr1 73 chr1_73_T_G T G 100 PASS . GT:AD:PL 0/0:10,11:50,0,60 diff --git a/testdata/picard/vcf/LiftOver/vcfWithFlippedAlleles.lift.vcf b/testdata/picard/vcf/LiftOver/vcfWithFlippedAlleles.lift.vcf index fda2296355..b49411d05e 100644 --- a/testdata/picard/vcf/LiftOver/vcfWithFlippedAlleles.lift.vcf +++ b/testdata/picard/vcf/LiftOver/vcfWithFlippedAlleles.lift.vcf @@ -6,5 +6,5 @@ ##INFO= ##contig= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 -chr1 1 . C CCCCT 15676.17 PASS . GT 0/0 +chr1 1 chr1_1_C_CCCCT C CCCCT 15676.17 PASS . GT 0/0 chr1 72 chr1_72_C_A A C 100 PASS SwappedAlleles GT:AD:PL 0/0:11,10:60,0,50 diff --git a/testdata/picard/vcf/LiftOver/vcfWithFlippedAlleles.reject.vcf b/testdata/picard/vcf/LiftOver/vcfWithFlippedAlleles.reject.vcf index 9576828dab..3c77aedde1 100644 --- a/testdata/picard/vcf/LiftOver/vcfWithFlippedAlleles.reject.vcf +++ b/testdata/picard/vcf/LiftOver/vcfWithFlippedAlleles.reject.vcf @@ -10,5 +10,5 @@ ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 -chr1 61 . GT G 724.43 MismatchedRefAllele AttemptedLocus=chr1:61-62;AttemptedAlleles=GT*->G GT 0/1 -chr1 73 chr1_73_G_T G T 100 MismatchedRefAllele AttemptedLocus=chr1:73-73;AttemptedAlleles=G*->T GT:AD:PL 0/0:10,11:50,0,60 +chr1 61 chr1_61_TC_T TC T 724.43 MismatchedRefAllele AttemptedLocus=chr1:61-62;AttemptedAlleles=TC*->T GT 0/1 +chr1 73 chr1_73_T_G T G 100 MismatchedRefAllele AttemptedLocus=chr1:73-73;AttemptedAlleles=T*->G GT:AD:PL 0/0:10,11:50,0,60 diff --git a/testdata/picard/vcf/LiftOver/vcfWithFlippedAllelesNegativeChain.lift.vcf b/testdata/picard/vcf/LiftOver/vcfWithFlippedAllelesNegativeChain.lift.vcf index 83ae3a2680..e86e6d075e 100644 --- a/testdata/picard/vcf/LiftOver/vcfWithFlippedAllelesNegativeChain.lift.vcf +++ b/testdata/picard/vcf/LiftOver/vcfWithFlippedAllelesNegativeChain.lift.vcf @@ -6,6 +6,6 @@ ##INFO= ##contig= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 -chr1 421 . CA C 724.43 PASS ReverseComplementedAlleles GT 0/1 -chr1 468 chr1_73_G_T A C 100 PASS SwappedAlleles;ReverseComplementedAlleles GT:AD:PL 1/1:11,10:60,0,50 -chr1 539 . A AAGGG 15676.17 PASS ReverseComplementedAlleles GT 0/0 +chr1 468 chr1_73_T_G A C 100 PASS ReverseComplementedAlleles GT:AD:PL 0/0:10,11:50,0,60 +chr1 478 chr1_61_TC_T AG A 724.43 PASS ReverseComplementedAlleles GT 0/1 +chr1 539 chr1_1_C_CCCCT A AAGGG 15676.17 PASS ReverseComplementedAlleles GT 0/0 diff --git a/testdata/picard/vcf/LiftOver/vcfWithMixed.lift.vcf b/testdata/picard/vcf/LiftOver/vcfWithMixed.lift.vcf index 7992e0dd2e..0335f6a00c 100644 --- a/testdata/picard/vcf/LiftOver/vcfWithMixed.lift.vcf +++ b/testdata/picard/vcf/LiftOver/vcfWithMixed.lift.vcf @@ -8,6 +8,6 @@ ##contig= #CHROM POS ID REF ALT QUAL FILTER INFO chr1 361 . C T,AC 912 PASS ReverseComplementedAlleles -chr1 420 . ACA A,ATCA,AT 934 PASS ReverseComplementedAlleles chr1 420 . A C,TA 724.43 PASS ReverseComplementedAlleles +chr1 421 . CAA T,TCAT,TT 934 PASS ReverseComplementedAlleles chr1 481 . C AGGGC,T 15676.17 PASS ReverseComplementedAlleles From 6f81b94e4550d918e454711e4fb9a1d96929ff40 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Thu, 20 Feb 2020 00:52:03 +0200 Subject: [PATCH 7/8] - cleanup --- src/main/java/picard/util/LiftoverUtils.java | 43 ++++++++------------ 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/src/main/java/picard/util/LiftoverUtils.java b/src/main/java/picard/util/LiftoverUtils.java index bac4e40e77..1b2a426b99 100644 --- a/src/main/java/picard/util/LiftoverUtils.java +++ b/src/main/java/picard/util/LiftoverUtils.java @@ -159,25 +159,19 @@ protected static VariantContextBuilder reverseComplementVariantContext(final Var if (target.isPositiveStrand()) { throw new IllegalArgumentException("This should only be called for negative strand liftovers"); } + final int start = target.getStart(); + final int stop = target.getEnd(); final List origAlleles = new ArrayList<>(source.getAlleles()); final VariantContextBuilder vcb = new VariantContextBuilder(source); - vcb.rmAttribute(VCFConstants.END_KEY); - vcb.chr(target.getContig()); + vcb.rmAttribute(VCFConstants.END_KEY) + .chr(target.getContig()) + .start(start) + .stop(stop) + .alleles(reverseComplementAlleles(origAlleles)); - final boolean indelForLiftover = isIndelForLiftover(source); - - final int start = target.getStart() ;//- (addToStart ? 1 : 0); - vcb.start(start); - - final int stop = target.getEnd();// + (addToStart || !indelForLiftover ? 0 : 1); - vcb.stop(stop); - - vcb.alleles(reverseComplementAlleles(origAlleles)); - - - if (indelForLiftover) { + if (isIndelForLiftover(source)) { // check that the reverse complemented bases match the new reference if (referenceAlleleDiffersFromReferenceForIndel(vcb.getAlleles(), refSeq, start, stop)) { return null; @@ -201,17 +195,13 @@ private static boolean isIndelForLiftover(final VariantContext vc){ } private static List reverseComplementAlleles(final List originalAlleles) { - final List alleles = new ArrayList<>(); - - for (final Allele oldAllele : originalAlleles) { - alleles.add(LiftoverUtils.reverseComplement(oldAllele)); - } - - return alleles; + return originalAlleles + .stream() + .map(LiftoverUtils::reverseComplement) + .collect(Collectors.toList()); } private static Allele reverseComplement(final Allele oldAllele) { - if (oldAllele.isSymbolic() || oldAllele.isNoCall() || oldAllele.equals(Allele.SPAN_DEL)) { return oldAllele; } else { @@ -345,7 +335,7 @@ private static boolean referenceAlleleDiffersFromReferenceForIndel(final List new IllegalStateException("Error: no reference allele was present")); - return (!refString.equalsIgnoreCase(refAllele.getBaseString())); + return !refString.equalsIgnoreCase(refAllele.getBaseString()); } /** @@ -452,14 +442,13 @@ protected static void leftAlignVariant(final VariantContextBuilder builder, fina } private static byte[] truncateBase(final byte[] allele, final boolean truncateRightmost) { - return Arrays.copyOfRange(allele, truncateRightmost ? 0 : 1, truncateRightmost ? - allele.length - 1 : - allele.length); + return Arrays.copyOfRange(allele, + truncateRightmost ? 0 : 1, + truncateRightmost ? allele.length - 1 : allele.length); } //creates a new byte array with the base added at the beginning private static byte[] extendOneBase(final byte[] bases, final byte base) { - final byte[] newBases = new byte[bases.length + 1]; System.arraycopy(bases, 0, newBases, 1, bases.length); From f59b04eed5bb74fe6e6d6f2dfd8429e2eb375be1 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Fri, 3 Apr 2020 14:45:47 -0400 Subject: [PATCH 8/8] -whitespaces --- src/main/java/picard/util/LiftoverUtils.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/main/java/picard/util/LiftoverUtils.java b/src/main/java/picard/util/LiftoverUtils.java index 1b2a426b99..4dd5defcb9 100644 --- a/src/main/java/picard/util/LiftoverUtils.java +++ b/src/main/java/picard/util/LiftoverUtils.java @@ -99,7 +99,7 @@ public static VariantContext liftVariant(final VariantContext source, final Inte // If any of the source alleles is symbolic, do not populate the END tag (protecting things like from // getting screwed up in reverse-complemented variants if (source.hasAttribute(VCFConstants.END_KEY) && builder.getAlleles().stream().noneMatch(Allele::isSymbolic)) { - builder.attribute(VCFConstants.END_KEY, (int)builder.getStop()); + builder.attribute(VCFConstants.END_KEY, (int) builder.getStop()); } else { builder.rmAttribute(VCFConstants.END_KEY); } @@ -184,14 +184,16 @@ protected static VariantContextBuilder reverseComplementVariantContext(final Var return vcb; } - private static boolean isIndelForLiftover(final VariantContext vc){ + private static boolean isIndelForLiftover(final VariantContext vc) { final Allele ref = vc.getReference(); if (ref.length() != 1) { return true; } - return vc.getAlleles().stream().filter(a -> !a.isSymbolic()).filter(a -> !a.equals(Allele.SPAN_DEL)). - anyMatch(a -> a.length() != 1); + return vc.getAlleles().stream() + .filter(a -> !a.isSymbolic()) + .filter(a -> !a.equals(Allele.SPAN_DEL)) + .anyMatch(a -> a.length() != 1); } private static List reverseComplementAlleles(final List originalAlleles) { @@ -360,7 +362,7 @@ protected static void leftAlignVariant(final VariantContextBuilder builder, fina // Put each allele into the alleleBasesMap unless it is a spanning deletion. // Spanning deletions are dealt with as a special case later in fixedAlleleMap. alleles.stream() - .filter(a->!a.equals(Allele.SPAN_DEL)&&!a.isSymbolic()) + .filter(a -> !a.equals(Allele.SPAN_DEL) && !a.isSymbolic()) .forEach(a -> alleleBasesMap.put(a, a.getBases())); int theStart = start; @@ -409,7 +411,7 @@ protected static void leftAlignVariant(final VariantContextBuilder builder, fina alleleBasesMap.values().stream() .collect(Collectors.groupingBy(a -> a[0], Collectors.toSet())) .size() == 1 - ) { + ) { //9. truncate the leftmost base of the alleles alleleBasesMap.replaceAll((a, v) -> truncateBase(alleleBasesMap.get(a), false));