Skip to content

Commit

Permalink
Fixing HGVS conversion for indels (#452)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed May 21, 2019
1 parent 746603e commit 504f082
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 79 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### jannovar-cli

- Using HTTP protocol instead of FTP everywhere as it's possible (#451).
- Fixing HGVS conversion for indels (#452).

## v0.30

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
import de.charite.compbio.jannovar.hgvs.parser.HGVSParser;
import de.charite.compbio.jannovar.hgvs.parser.HGVSParsingException;
import de.charite.compbio.jannovar.reference.GenomeVariant;
import de.charite.compbio.jannovar.reference.Strand;
import de.charite.compbio.jannovar.vardbs.base.VariantDescription;
import de.charite.compbio.jannovar.vardbs.base.VariantNormalizer;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.variant.variantcontext.Allele;
Expand Down Expand Up @@ -46,6 +49,10 @@ public class ProjectTranscriptToChromosome extends JannovarAnnotationCommand {
* Translation of variants
*/
NucleotideChangeToGenomeVariantTranslator translator;
/**
* Normalization of variants.
*/
VariantNormalizer normalizer;
/**
* Configuration
*/
Expand All @@ -64,6 +71,7 @@ public void run() throws JannovarException {
deserializeTranscriptDefinitionFile(options.getDatabaseFilePath());
System.err.println("Loading FASTA index...");
loadFASTAIndex();
normalizer = new VariantNormalizer(options.getPathReferenceFASTA());
System.err.println("Opening output VCF file...");
try (VariantContextWriter writer = openOutputFile()) {
processFile(writer);
Expand Down Expand Up @@ -184,7 +192,7 @@ private String mapContigToFasta(String contigName) {
// Try to find matching contig in fasta
String nameInFasta = null;
for (SAMSequenceRecord record : fasta.getSequenceDictionary().getSequences()) {
if (jannovarData.getRefDict().getContigNameToID().containsKey(record.getSequenceName())) {
if (contigID.equals(jannovarData.getRefDict().getContigNameToID().get(record.getSequenceName()))) {
nameInFasta = record.getSequenceName();
break;
}
Expand All @@ -196,23 +204,20 @@ private String mapContigToFasta(String contigName) {
}

private void writeVariant(VariantContextWriter writer, GenomeVariant genomeVar) {
String nameInFasta = mapContigToFasta(genomeVar.getChrName());
List<Allele> alleles = new ArrayList<Allele>();
int shift = 0;
if (genomeVar.getRef().isEmpty() || genomeVar.getAlt().isEmpty()) {
shift = -1;
String left = fasta.getSubsequenceAt(nameInFasta, genomeVar.getPos(), genomeVar.getPos())
.getBaseString();
alleles.add(Allele.create(left + genomeVar.getRef(), true));
alleles.add(Allele.create(left + genomeVar.getAlt(), false));
} else {
alleles.add(Allele.create(genomeVar.getRef(), true));
alleles.add(Allele.create(genomeVar.getAlt(), false));
}
genomeVar = genomeVar.withStrand(Strand.FWD);
final String nameInFasta = mapContigToFasta(genomeVar.getChrName());
final VariantDescription desc = normalizer.normalizeInsertion(
new VariantDescription(nameInFasta, genomeVar.getPos(), genomeVar.getRef(), genomeVar.getAlt())
);

final List<Allele> alleles = Lists.newArrayList(
Allele.create(desc.getRef(), true),
Allele.create(desc.getAlt(), false)
);

VariantContextBuilder builder = new VariantContextBuilder();
builder.chr(genomeVar.getChrName()).start(genomeVar.getPos() + shift + 1)
.computeEndFromAlleles(alleles, genomeVar.getPos() + shift + 1).alleles(alleles);
builder.chr(nameInFasta).start(desc.getPos() + 1)
.computeEndFromAlleles(alleles, desc.getPos() + 1).alleles(alleles);

writer.add(builder.make());
}
Expand Down
Loading

0 comments on commit 504f082

Please sign in to comment.