From 18ba007d0e64100be42fd5f039e9596a35839766 Mon Sep 17 00:00:00 2001 From: clintval Date: Sat, 28 Dec 2024 11:35:40 -0500 Subject: [PATCH] Add a class for parsing secondary alignments from XA/XB --- fgpyo/sam/__init__.py | 240 +++++++++++ tests/fgpyo/sam/test_secondary_alignment.py | 444 ++++++++++++++++++++ 2 files changed, 684 insertions(+) create mode 100644 tests/fgpyo/sam/test_secondary_alignment.py diff --git a/fgpyo/sam/__init__.py b/fgpyo/sam/__init__.py index 8fa2c19..9543fe1 100644 --- a/fgpyo/sam/__init__.py +++ b/fgpyo/sam/__init__.py @@ -153,12 +153,39 @@ >>> [str(sup.cigar) for sup in sups] ['50S100M', '75S75M'] ``` + +## Examples of parsing bwa's `XA` and `XB` tags into individual secondary alignments + +```python +>>> from fgpyo.sam import SecondaryAlignment +>>> xa = SecondaryAlignment.from_tag_part("chr9,-104599381,49M,4") +>>> xa.reference_name +'chr9' +>>> xb = SecondaryAlignment.from_tag_part("chr9,-104599381,49M,4,0,30") +>>> xb.reference_name +'chr9' +>>> xb.mapq +30 +>>> xa.cigar == xb.cigar +True +>>> xb_tag = "chr9,-104599381,49M,4,0,30;chr3,+170653467,49M,4,0,20;" +>>> xb1, xb2 = SecondaryAlignment.many_from_tag(xb_tag) +>>> xb1.is_forward +False +>>> xb1.is_forward +True +>>> xb1.mapq, xb2.mapq +('30', '20') +``` + """ import enum import io import sys from collections.abc import Collection +from dataclasses import dataclass +from functools import cached_property from itertools import chain from pathlib import Path from typing import IO @@ -182,6 +209,7 @@ import fgpyo.io from fgpyo.collections import PeekableIterator +from fgpyo.sequence import reverse_complement SamPath = Union[IO[Any], Path, str] """The valid base classes for opening a SAM/BAM/CRAM file.""" @@ -195,6 +223,9 @@ NO_REF_POS: int = -1 """The reference position to use to indicate no position in SAM/BAM.""" +NO_QUERY_BASES: str = "*" +"""The string to use for a SAM record with missing query bases.""" + _IOClasses = (io.TextIOBase, io.BufferedIOBase, io.RawIOBase, io.IOBase) """The classes that should be treated as file-like classes""" @@ -1231,3 +1262,212 @@ class SamOrder(enum.Enum): Coordinate = "coordinate" #: coordinate sorted QueryName = "queryname" #: queryname sorted Unknown = "unknown" # Unknown SAM / BAM / CRAM sort order + + +@dataclass(frozen=True) +class SecondaryAlignment: + """A secondary alignment as encoded in one part of the `XA` or `XB` tag value on a SAM record. + + Format of a single secondary alignment in the `XA` tag (`,`-delimited): + + ```text + chr:,cigar,NM + ``` + + Full example of an `XA` tag value (`;`-delimited): + + ```text + XA:Z:chr9,-104599381,49M,4;chr3,+170653467,49M,4;chr12,+46991828,49M,5; + ``` + + Format of a single secondary alignment in the `XB` tag (`,`-delimited): + + ```text + chr:,cigar,NM,AS,MapQ + ``` + + Full example of an `XB` tag value (`;`-delimited): + + ```text + XB:Z:chr9,-104599381,49M,4,0,30;chr3,+170653467,49M,4,0,20;chr12,+46991828,49M,5,0,10; + ``` + + Args: + reference_name: The reference sequence name. + reference_start: The 0-based start position of the alignment. + is_forward: If the alignment is in the forward orientation or not. + cigar: The Cigar sequence representing the alignment. + edit_distance: The number of mismatches between the query and the target. + alignment_score: The aligner-reported alignment score, if available. + mapq: The aligner-reported probability of an incorrect mapping, if available. + + See: + - [BWA User Manual](https://bio-bwa.sourceforge.net/bwa.shtml) + - [https://github.com/lh3/bwa/pull/292](https://github.com/lh3/bwa/pull/292) + - [https://github.com/lh3/bwa/pull/293](https://github.com/lh3/bwa/pull/293) + - [https://github.com/lh3/bwa/pull/355](https://github.com/lh3/bwa/pull/355) + """ + + reference_name: str + reference_start: int + is_forward: bool + cigar: Cigar + edit_distance: int + alignment_score: Optional[int] = None + mapq: Optional[int] = None + + def __post_init__(self) -> None: + """Perform post-initialization validation on this dataclass.""" + if self.reference_start < 0: + raise ValueError(f"Start cannot be less zero! Found: {self.reference_start}") + if self.edit_distance < 0: + raise ValueError(f"Edit distance cannot be less zero! Found: {self.edit_distance}") + if self.alignment_score is not None and self.alignment_score < 0: + raise ValueError(f"Alignment score cannot be less zero! Found: {self.alignment_score}") + if self.mapq is not None and self.mapq < 0: + raise ValueError(f"Mapping quality cannot be less zero! Found: {self.mapq}") + + @classmethod + def from_tag_part(cls, part: str) -> "SecondaryAlignment": + """Build a secondary alignment from a single `XA` or `XB` tag part. + + Args: + part: A single element in an `XA` or `XB` tag value. + """ + fields: list[str] = part.rstrip(",").split(",") + + if len(fields) == 4: + reference_name, stranded_start, cigar, edit_distance = fields + alignment_score = None + mapq = None + elif len(fields) == 6: + reference_name, stranded_start, cigar, edit_distance, alignment_score, mapq = fields + else: + raise ValueError(f"XA or XB tag part does not have 4 or 6 ',' separated fields: {part}") + + if len(stranded_start) <= 1 or stranded_start[0] not in {"+", "-"}: + raise ValueError(f"The stranded start field is malformed: {stranded_start}") + + return cls( + reference_name=reference_name, + reference_start=int(stranded_start[1:]) - 1, + is_forward=stranded_start[0] == "+", + cigar=Cigar.from_cigarstring(cigar), + edit_distance=int(edit_distance), + alignment_score=None if alignment_score is None else int(alignment_score), + mapq=None if mapq is None else int(mapq), + ) + + @classmethod + def many_from_tag(cls, value: str) -> list["SecondaryAlignment"]: + """Build many secondary alignments from a single `XA` or `XB` tag value. + + Args: + value: A single `XA` or `XB` tag value. + """ + return list(map(cls.from_tag_part, value.rstrip(";").split(";"))) + + @classmethod + def many_from_rec(cls, rec: AlignedSegment) -> list["SecondaryAlignment"]: + """Build many secondary alignments from a single SAM record. + + Args: + rec: The SAM record to generate secondary alignments from. + """ + secondaries: list["SecondaryAlignment"] = [] + if rec.has_tag("XA"): + secondaries.extend(cls.many_from_tag(cast(str, rec.get_tag("XA")))) + if rec.has_tag("XB"): + secondaries.extend(cls.many_from_tag(cast(str, rec.get_tag("XB")))) + return secondaries + + @classmethod + def many_sam_from_rec(cls, rec: AlignedSegment) -> Iterator[AlignedSegment]: + """Build many SAM secondary alignments from a single SAM record. + + All reconstituted secondary alignments will have the `rh` SAM tag set upon them. + + By default, the query bases and qualities of the secondary alignment will be set to the + query bases and qualities of the record that created the secondary alignments. However, if + there are hard-clips in the record used to create the secondary alignments, then this + function will set the query qualities and bases to the space-saving and/or unknown marker + `*`. A future development for this function should correctly pad-out (with No-calls) or clip + the query sequence and qualities depending on the hard-clipping found in both ends of the + source (often a primary) record and both ends of the destination (secondary) record. + + Args: + rec: The SAM record to generate secondary alignments from. + """ + if ( + rec.is_unmapped + or rec.cigarstring is None + or rec.query_sequence is None + or rec.query_qualities is None + ): + return + + for hit in cls.many_from_rec(rec): + # TODO: When the original record has hard clips we must set the bases and quals to "*". + # It would be smarter to pad/clip the sequence to be compatible with new cigar... + if "H" in rec.cigarstring: + query_sequence = NO_QUERY_BASES + query_qualities = None + elif rec.is_forward and not hit.is_forward: + query_sequence = reverse_complement(rec.query_sequence) + query_qualities = rec.query_qualities[::-1] + else: + query_sequence = rec.query_sequence + query_qualities = rec.query_qualities + + secondary = AlignedSegment(header=rec.header) + secondary.query_name = rec.query_name + secondary.reference_id = rec.header.get_tid(hit.reference_name) + secondary.reference_name = hit.reference_name + secondary.reference_start = hit.reference_start + secondary.mapping_quality = 0 if hit.mapq is None else hit.mapq + secondary.cigarstring = str(hit.cigar) + secondary.query_sequence = query_sequence + secondary.query_qualities = query_qualities + secondary.is_read1 = rec.is_read1 + secondary.is_read2 = rec.is_read2 + secondary.is_duplicate = rec.is_duplicate + secondary.is_paired = rec.is_paired + secondary.is_proper_pair = False + secondary.is_qcfail = rec.is_qcfail + secondary.is_forward = hit.is_forward + secondary.is_secondary = True + secondary.is_supplementary = False + secondary.is_mapped = True + + # NB: mate information on a secondary alignment points to mate/next primary alignment. + secondary.next_reference_id = rec.next_reference_id + secondary.next_reference_name = rec.next_reference_name + secondary.next_reference_start = rec.next_reference_start + secondary.mate_is_mapped = rec.mate_is_mapped + secondary.mate_is_reverse = rec.mate_is_reverse + secondary.set_tag("MC", rec.get_tag("MC") if rec.has_tag("MC") else None) + secondary.set_tag("MQ", rec.get_tag("MQ") if rec.has_tag("MQ") else None) + secondary.set_tag("ms", rec.get_tag("ms") if rec.has_tag("ms") else None) + + # NB: set some optional but highly recommended SAM tags on the secondary alignment. + secondary.set_tag("AS", hit.alignment_score) + secondary.set_tag("NM", hit.edit_distance) + secondary.set_tag("RG", rec.get_tag("RG") if rec.has_tag("RG") else None) + secondary.set_tag("RX", rec.get_tag("RX") if rec.has_tag("RX") else None) + + # NB: set a tag that indicates this alignment was a reconstituted secondary alignment. + secondary.set_tag("rh", True) + + yield secondary + + @classmethod + def add_to_template(cls, template: Template) -> Template: + """Rebuild a template by adding secondary alignments from all R1/R2 `XA` and `XB` tags.""" + r1_secondaries = iter([]) if template.r1 is None else cls.many_sam_from_rec(template.r1) + r2_secondaries = iter([]) if template.r2 is None else cls.many_sam_from_rec(template.r2) + return Template.build(chain(template.all_recs(), r1_secondaries, r2_secondaries)) + + @cached_property + def reference_end(self) -> int: + """Returns the 0-based half-open end coordinate of this secondary alignment.""" + return self.reference_start + self.cigar.length_on_target() diff --git a/tests/fgpyo/sam/test_secondary_alignment.py b/tests/fgpyo/sam/test_secondary_alignment.py new file mode 100644 index 0000000..e2d66e1 --- /dev/null +++ b/tests/fgpyo/sam/test_secondary_alignment.py @@ -0,0 +1,444 @@ +from typing import Any + +import pytest + +from fgpyo.sam import NO_QUERY_BASES +from fgpyo.sam import Cigar +from fgpyo.sam import SecondaryAlignment +from fgpyo.sam import Template +from fgpyo.sam import sum_of_base_qualities +from fgpyo.sam.builder import SamBuilder +from fgpyo.sequence import reverse_complement + + +@pytest.mark.parametrize( + ["kwargs", "error"], + [ + ( + { + "reference_name": "chr9", + "reference_start": -1, + "is_forward": False, + "cigar": Cigar.from_cigarstring("49M"), + "edit_distance": 4, + "alignment_score": 0, + "mapq": 30, + }, + "Start cannot be less zero! Found: -1", + ), + ( + { + "reference_name": "chr9", + "reference_start": 123232, + "is_forward": False, + "cigar": Cigar.from_cigarstring("49M"), + "edit_distance": -1, + "alignment_score": 0, + "mapq": 30, + }, + "Edit distance cannot be less zero! Found: -1", + ), + ( + { + "reference_name": "chr9", + "reference_start": 123232, + "is_forward": False, + "cigar": Cigar.from_cigarstring("49M"), + "edit_distance": 4, + "alignment_score": -1, + "mapq": 30, + }, + "Alignment score cannot be less zero! Found: -1", + ), + ( + { + "reference_name": "chr9", + "reference_start": 123232, + "is_forward": False, + "cigar": Cigar.from_cigarstring("49M"), + "edit_distance": 4, + "alignment_score": 4, + "mapq": -1, + }, + "Mapping quality cannot be less zero! Found: -1", + ), + ], +) +def test_secondary_alignment_validation(kwargs: dict[str, Any], error: str) -> None: + """Test that we raise exceptions for invalid arguments to SecondaryAlignment.""" + with pytest.raises(ValueError, match=error): + SecondaryAlignment(**kwargs) + + +@pytest.mark.parametrize( + ["part", "expected"], + [ + [ + # Test a well-formed negatively stranded XB + "chr9,-104599381,49M,4,0,30", + SecondaryAlignment( + reference_name="chr9", + reference_start=104599380, + is_forward=False, + cigar=Cigar.from_cigarstring("49M"), + edit_distance=4, + alignment_score=0, + mapq=30, + ), + ], + [ + # Test a positive stranded XB and extra trailing commas + "chr3,+170653467,49M,4,0,20,,,,", + SecondaryAlignment( + reference_name="chr3", + reference_start=170653466, + is_forward=True, + cigar=Cigar.from_cigarstring("49M"), + edit_distance=4, + alignment_score=0, + mapq=20, + ), + ], + [ + # Test a well-formed negatively stranded XA + "chr9,-104599381,49M,4", + SecondaryAlignment( + reference_name="chr9", + reference_start=104599380, + is_forward=False, + cigar=Cigar.from_cigarstring("49M"), + edit_distance=4, + alignment_score=None, + mapq=None, + ), + ], + [ + # Test a positive stranded XA and extra trailing commas + "chr3,+170653467,49M,4,,,,", + SecondaryAlignment( + reference_name="chr3", + reference_start=170653466, + is_forward=True, + cigar=Cigar.from_cigarstring("49M"), + edit_distance=4, + alignment_score=None, + mapq=None, + ), + ], + ], +) +def test_secondary_alignment_from_part(part: str, expected: SecondaryAlignment) -> None: + """Test that we can build an XA or XB from a part of the tag value.""" + assert SecondaryAlignment.from_tag_part(part) == expected + + +def test_many_from_tag_invalid_number_of_commas() -> None: + """Test that we raise an exception if we don't have 6 or 8 fields.""" + with pytest.raises( + ValueError, match="XA or XB tag part does not have 4 or 6 ',' separated fields:" + ): + SecondaryAlignment.from_tag_part("chr9,-104599381,49M") + + +@pytest.mark.parametrize( + ["secondary", "expected"], + [ + ( + SecondaryAlignment( + reference_name="chr3", + reference_start=170653466, + is_forward=True, + cigar=Cigar.from_cigarstring("1H49M"), + edit_distance=4, + ), + 170653466 + 49, + ), + ( + SecondaryAlignment( + reference_name="chr3", + reference_start=170653466, + is_forward=True, + cigar=Cigar.from_cigarstring("10M10I10D10M"), + edit_distance=4, + ), + 170653466 + 30, + ), + ], +) +def test_secondary_alignment_reference_end_property( + secondary: SecondaryAlignment, expected: int +) -> None: + """Test that we correctly calculate reference end based on start and cigar.""" + assert secondary.reference_end == expected + + +def test_xas_many_from_tag() -> None: + """Test that we can build many secondary alignments from multiple parts in an XA tag value.""" + value: str = "chr9,-104599381,49M,4;chr3,+170653467,49M,4;;;" # with trailing ';' + assert SecondaryAlignment.many_from_tag(value) == [ + SecondaryAlignment( + reference_name="chr9", + reference_start=104599380, + is_forward=False, + cigar=Cigar.from_cigarstring("49M"), + edit_distance=4, + alignment_score=None, + mapq=None, + ), + SecondaryAlignment( + reference_name="chr3", + reference_start=170653466, + is_forward=True, + cigar=Cigar.from_cigarstring("49M"), + edit_distance=4, + alignment_score=None, + mapq=None, + ), + ] + + +def test_xbs_many_from_tag() -> None: + """Test that we can build many secondary alignments from multiple parts in an XB tag value.""" + value: str = "chr9,-104599381,49M,4,0,30;chr3,+170653467,49M,4,0,20;;;" # with trailing ';' + assert SecondaryAlignment.many_from_tag(value) == [ + SecondaryAlignment( + reference_name="chr9", + reference_start=104599380, + is_forward=False, + cigar=Cigar.from_cigarstring("49M"), + edit_distance=4, + alignment_score=0, + mapq=30, + ), + SecondaryAlignment( + reference_name="chr3", + reference_start=170653466, + is_forward=True, + cigar=Cigar.from_cigarstring("49M"), + edit_distance=4, + alignment_score=0, + mapq=20, + ), + ] + + +def test_xa_many_from_rec() -> None: + """Test that we return secondary alignments from a SAM record with multiple XAs.""" + value: str = "chr9,-104599381,49M,4;chr3,+170653467,49M,4;;;" # with trailing ';' + builder = SamBuilder() + rec = builder.add_single(chrom="chr1", start=32) + + assert list(SecondaryAlignment.many_from_rec(rec)) == [] + + rec.set_tag("XB", value) + + assert list(SecondaryAlignment.many_from_rec(rec)) == [ + SecondaryAlignment( + reference_name="chr9", + reference_start=104599380, + is_forward=False, + cigar=Cigar.from_cigarstring("49M"), + edit_distance=4, + alignment_score=None, + mapq=None, + ), + SecondaryAlignment( + reference_name="chr3", + reference_start=170653466, + is_forward=True, + cigar=Cigar.from_cigarstring("49M"), + edit_distance=4, + alignment_score=None, + mapq=None, + ), + ] + + +def test_xb_many_from_rec() -> None: + """Test that we return secondary alignments from a SAM record with multiple XBs.""" + value: str = "chr9,-104599381,49M,4,0,30;chr3,+170653467,49M,4,0,20;;;" # with trailing ';' + builder = SamBuilder() + rec = builder.add_single(chrom="chr1", start=32) + + assert list(SecondaryAlignment.many_from_rec(rec)) == [] + + rec.set_tag("XB", value) + + assert list(SecondaryAlignment.many_from_rec(rec)) == [ + SecondaryAlignment( + reference_name="chr9", + reference_start=104599380, + is_forward=False, + cigar=Cigar.from_cigarstring("49M"), + edit_distance=4, + alignment_score=0, + mapq=30, + ), + SecondaryAlignment( + reference_name="chr3", + reference_start=170653466, + is_forward=True, + cigar=Cigar.from_cigarstring("49M"), + edit_distance=4, + alignment_score=0, + mapq=20, + ), + ] + + +def test_xa_many_sam_from_rec() -> None: + """Test that we return secondary alignments as SAM records from a record with multiple XAs.""" + value: str = "chr9,-104599381,49M,4;chr3,+170653467,49M,4;;;" # with trailing ';' + builder = SamBuilder() + rec, mate = builder.add_pair(chrom="chr1", start1=32, start2=49) + rec.set_tag("RX", "ACGT") + + assert rec.query_sequence is not None # for type narrowing + assert rec.query_qualities is not None # for type narrowing + assert list(SecondaryAlignment.many_from_rec(rec)) == [] + + rec.set_tag("XB", value) + first, second = list(SecondaryAlignment.many_sam_from_rec(rec)) + + assert first.reference_name == "chr9" + assert first.reference_id == rec.header.get_tid("chr9") + assert first.reference_start == 104599380 + assert not first.is_forward + assert first.query_sequence == reverse_complement(rec.query_sequence) + assert first.query_qualities == rec.query_qualities[::-1] + assert first.cigarstring == "49M" + assert not first.has_tag("AS") + assert first.get_tag("NM") == 4 + assert first.get_tag("rh") == 1 + assert first.mapping_quality == 0 + + assert second.reference_name == "chr3" + assert second.reference_id == rec.header.get_tid("chr3") + assert second.reference_start == 170653466 + assert second.is_forward + assert second.query_sequence == rec.query_sequence + assert second.query_qualities == rec.query_qualities + assert second.cigarstring == "49M" + assert not second.has_tag("AS") + assert second.get_tag("NM") == 4 + assert second.get_tag("rh") == 1 + assert second.mapping_quality == 0 + + for result in (first, second): + assert result.query_name == rec.query_name + assert result.is_read1 is rec.is_read1 + assert result.is_read2 is rec.is_read2 + assert result.is_duplicate is rec.is_duplicate + assert result.is_paired is rec.is_paired + assert not result.is_proper_pair + assert result.is_qcfail is rec.is_qcfail + assert result.is_secondary + assert not result.is_supplementary + assert result.is_mapped + + assert result.next_reference_id == mate.reference_id + assert result.next_reference_name == mate.reference_name + assert result.next_reference_start == mate.reference_start + assert result.mate_is_mapped is mate.is_mapped + assert result.mate_is_reverse is mate.is_reverse + assert result.get_tag("MC") == mate.cigarstring + assert result.get_tag("ms") == sum_of_base_qualities(mate) + assert result.get_tag("MQ") == mate.mapping_quality + assert result.get_tag("RG") == rec.get_tag("RG") + assert result.get_tag("RX") == rec.get_tag("RX") + + +def test_xb_many_sam_from_rec() -> None: + """Test that we return secondary alignments as SAM records from a record with multiple XBs.""" + value: str = "chr9,-104599381,49M,4,0,30;chr3,+170653467,49M,4,0,20;;;" # with trailing ';' + builder = SamBuilder() + rec, mate = builder.add_pair(chrom="chr1", start1=32, start2=49) + rec.set_tag("RX", "ACGT") + + assert rec.query_sequence is not None # for type narrowing + assert rec.query_qualities is not None # for type narrowing + assert list(SecondaryAlignment.many_from_rec(rec)) == [] + + rec.set_tag("XB", value) + first, second = list(SecondaryAlignment.many_sam_from_rec(rec)) + + assert first.reference_name == "chr9" + assert first.reference_id == rec.header.get_tid("chr9") + assert first.reference_start == 104599380 + assert not first.is_forward + assert first.query_sequence == reverse_complement(rec.query_sequence) + assert first.query_qualities == rec.query_qualities[::-1] + assert first.cigarstring == "49M" + assert first.get_tag("AS") == 0 + assert first.get_tag("NM") == 4 + assert first.get_tag("rh") == 1 + assert first.mapping_quality == 30 + + assert second.reference_name == "chr3" + assert second.reference_id == rec.header.get_tid("chr3") + assert second.reference_start == 170653466 + assert second.is_forward + assert second.query_sequence == rec.query_sequence + assert second.query_qualities == rec.query_qualities + assert second.cigarstring == "49M" + assert second.get_tag("AS") == 0 + assert second.get_tag("NM") == 4 + assert second.get_tag("rh") == 1 + assert second.mapping_quality == 20 + + for result in (first, second): + assert result.query_name == rec.query_name + assert result.is_read1 is rec.is_read1 + assert result.is_read2 is rec.is_read2 + assert result.is_duplicate is rec.is_duplicate + assert result.is_paired is rec.is_paired + assert not result.is_proper_pair + assert result.is_qcfail is rec.is_qcfail + assert result.is_secondary + assert not result.is_supplementary + assert result.is_mapped + + assert result.next_reference_id == mate.reference_id + assert result.next_reference_name == mate.reference_name + assert result.next_reference_start == mate.reference_start + assert result.mate_is_mapped is mate.is_mapped + assert result.mate_is_reverse is mate.is_reverse + assert result.get_tag("MC") == mate.cigarstring + assert result.get_tag("ms") == sum_of_base_qualities(mate) + assert result.get_tag("MQ") == mate.mapping_quality + assert result.get_tag("RG") == rec.get_tag("RG") + assert result.get_tag("RX") == rec.get_tag("RX") + + +def test_many_sam_from_rec_with_hard_clips() -> None: + """Test that we can't reconstruct the bases and qualities if there are hard clips.""" + value: str = "chr9,-104599381,31M,4,0,30" + builder = SamBuilder() + rec, _ = builder.add_pair(chrom="chr1", start1=32, start2=49, cigar1="1H30M") + + assert rec.query_sequence is not None # for type narrowing + assert rec.query_qualities is not None # for type narrowing + assert list(SecondaryAlignment.many_from_rec(rec)) == [] + + rec.set_tag("XB", value) + + (actual,) = SecondaryAlignment.many_sam_from_rec(rec) + + assert actual.query_sequence == NO_QUERY_BASES + + +def test_add_to_template() -> None: + """Test that we add secondary alignments as SAM records to a template.""" + value: str = "chr9,-104599381,49M,4,0,30;chr3,+170653467,49M,4,0,20;;;" # with trailing ';' + builder = SamBuilder() + rec = builder.add_single(chrom="chr1", start=32) + rec.set_tag("RX", "ACGT") + + assert list(SecondaryAlignment.many_from_rec(rec)) == [] + + rec.set_tag("XB", value) + + actual = SecondaryAlignment.add_to_template(Template.build([rec])) + expected = Template.build([rec] + list(SecondaryAlignment.many_sam_from_rec(rec))) + + assert actual == expected