diff --git a/doc/cookbook/index.md b/doc/cookbook/index.md
index b4c5d190e7..088dc35258 100644
--- a/doc/cookbook/index.md
+++ b/doc/cookbook/index.md
@@ -137,14 +137,7 @@ To search for either 1 insertion or 1 deletion you can use the seqan3::search_cf
\snippet doc/tutorial/09_search/search_small_snippets.cpp error_search
-# Reading the CIGAR information into an actual alignment
-
-In SeqAn, the conversion from a CIGAR string to an alignment (two `seqan3::aligned_sequence`s) is done automatically for
-you. You can access the resulting alignment via seqan3::sam_record::alignment:
-
-\snippet doc/tutorial/10_sam_file/sam_file_alignments_without_ref.cpp main
-
-# Combining sequence and alignment files
+# Reading the CIGAR information from a SAM file and constructing an alignment
This recipe can be used to:
1. Read in a FASTA file with the reference and a SAM file with the alignment
diff --git a/doc/fragments/io_sam_file_input.md b/doc/fragments/io_sam_file_input.md
index 1e28dade74..1fa7d2142e 100644
--- a/doc/fragments/io_sam_file_input.md
+++ b/doc/fragments/io_sam_file_input.md
@@ -9,13 +9,14 @@ of the file name. Constructing from a stream can be used if you have a non-file
std::istringstream. It also comes in handy, if you cannot use file-extension based detection, but know that
your input file has a certain format.
-The reference information is specific to the SAM format. The SAM format only stores a "semi-alignment" meaning that
-it has the query sequence and the cigar string representing the gap information but not the reference information.
-If you want to retrieve valid/full alignments, you need to pass the corresponding reference information:
+Passing reference information, e.g.
- ref_ids: The name of the references, e.g. "chr1", "chr2", ...
- ref_sequences: The reference sequence information **in the same order as the ref_ids**.
+comes in handy once you want to convert the CIGAR string, read from your file, into an actual alignment.
+This will be covered in the section \ref transform_cigar "Transforming the CIGAR information into an actual alignment".
+
In most cases the template parameters are deduced automatically:
\include test/snippet/io/sam_file/sam_file_input_construction_from_filename.cpp
@@ -80,6 +81,21 @@ of seqan3::sam_file_input::mapq_type.
\note But beware: with structured bindings you do need to get the order of elements correctly!
+#### Transforming the CIGAR information into an actual alignment
+\anchor transform_cigar
+
+In SeqAn, we represent an alignment as a tuple of two `seqan3::aligned_sequence`s.
+
+The conversion from a CIGAR string to an alignment can be done with the function `seqan3::alignment_from_cigar`.
+You need to pass the reference sequence with the position the read was aligned to and the read sequence. All
+of it is already in the `record` when reading a SAM file:
+
+\include snippet/alignment/cigar_conversion/alignment_from_cigar_io.cpp
+
+The code will print the following:
+
+\include snippet/alignment/cigar_conversion/alignment_from_cigar_io.err
+
#### Views on files
Since SeqAn files are ranges, you can also create views over files. A useful example is to filter the records
diff --git a/doc/fragments/io_sam_file_output.md b/doc/fragments/io_sam_file_output.md
index b25d602a50..d96b37d366 100644
--- a/doc/fragments/io_sam_file_output.md
+++ b/doc/fragments/io_sam_file_output.md
@@ -30,7 +30,7 @@ opening from stream it would not have.
\include test/snippet/io/sam_file/record_based_writing.cpp
-The easiest way to write to an alignment file is to use the `push_back()` member functions. These
+The easiest way to write to a SAM/BAM file is to use the `push_back()` member functions. These
work similarly to how they work on a std::vector.
You may also use a tuple like interface or the `emplace_back()`
function but this is not recommended since one would have to keep track of the
@@ -57,7 +57,7 @@ writing to another, because you don't have to configure the output file to match
\include test/snippet/io/sam_file/sam_file_output_custom_fields.cpp
-This will copy the \ref seqan3::field "seqan3::field::flag" and \ref seqan3::field "seqan3::field::ref_offset" value
+This will copy the \ref seqan3::field "seqan3::field::flag" and \ref seqan3::field "seqan3::field::ref_offset" value
into the new output file.
\note Note that the other SAM columns in the output file will have a default value, so unless you specify to read
diff --git a/doc/tutorial/10_sam_file/index.md b/doc/tutorial/10_sam_file/index.md
index 1e208ea220..b577c6e9c2 100644
--- a/doc/tutorial/10_sam_file/index.md
+++ b/doc/tutorial/10_sam_file/index.md
@@ -58,19 +58,19 @@ differences in most use cases.
To make things clearer, here is the table of SAM columns and the corresponding fields of a SAM file record:
-| # | SAM Column ID | FIELD name | seqan3::field |
-|:--:|:--------------|:-----------------------------------------------------------------------------------------------------|:-------------------------------------------------|
-| 1 | QNAME | seqan3::sam_record::id | seqan3::field::id |
-| 2 | FLAG | seqan3::sam_record::flag | seqan3::field::flag |
-| 3 | RNAME | seqan3::sam_record::reference_id | seqan3::field::ref_id |
-| 4 | POS | seqan3::sam_record::reference_position | seqan3::field::ref_offset |
-| 5 | MAPQ | seqan3::sam_record::mapping_quality | seqan3::field::mapq |
-| 6 | CIGAR | implicitly stored in seqan3::sam_record::alignment
explicitly stored in seqan3::sam_record::cigar_sequence | seqan3::field::alignment
seqan3::field::cigar |
-| 7 | RNEXT | seqan3::sam_record::mate_reference_id | seqan3::field::mate |
-| 8 | PNEXT | seqan3::sam_record::mate_position | seqan3::field::mate |
-| 9 | TLEN | seqan3::sam_record::template_length | seqan3::field::mate |
-| 10 | SEQ | seqan3::sam_record::sequence | seqan3::field::seq |
-| 11 | QUAL | seqan3::sam_record::base_qualities | seqan3::field::qual |
+| # | SAM Column ID | FIELD name | seqan3::field |
+|:--:|:--------------|:---------------------------------------|:------------------------- |
+| 1 | QNAME | seqan3::sam_record::id | seqan3::field::id |
+| 2 | FLAG | seqan3::sam_record::flag | seqan3::field::flag |
+| 3 | RNAME | seqan3::sam_record::reference_id | seqan3::field::ref_id |
+| 4 | POS | seqan3::sam_record::reference_position | seqan3::field::ref_offset |
+| 5 | MAPQ | seqan3::sam_record::mapping_quality | seqan3::field::mapq |
+| 6 | CIGAR | seqan3::sam_record::cigar_sequence | seqan3::field::cigar |
+| 7 | RNEXT | seqan3::sam_record::mate_reference_id | seqan3::field::mate |
+| 8 | PNEXT | seqan3::sam_record::mate_position | seqan3::field::mate |
+| 9 | TLEN | seqan3::sam_record::template_length | seqan3::field::mate |
+| 10 | SEQ | seqan3::sam_record::sequence | seqan3::field::seq |
+| 11 | QUAL | seqan3::sam_record::base_qualities | seqan3::field::qual |
SAM files provide following additional fields:
* seqan3::sam_record::sequence_position (seqan3::field::offset)
@@ -138,14 +138,12 @@ Average: 27.4
\endsolution
-# Alignment representation in the SAM format
-
-In SeqAn, we represent an alignment as a tuple of two `seqan3::aligned_sequence`s.
+# Alignment representation in SAM/BAM files
The SAM format is the common output format of read mappers where you align short read sequences
to one or more large reference sequences.
In fact, the SAM format stores those alignment information only partially:
-It **does not store the reference sequence** but only the read sequence and a *CIGAR* string
+It **does not store the reference sequence** but only the query/read sequence and a *CIGAR* string
representing the alignment based on the read.
Take this SAM record as an example:
@@ -183,26 +181,17 @@ into a std::vector\:
\snippet doc/tutorial/10_sam_file/sam_file_read_cigar.cpp main
-## Reading the CIGAR information into an actual alignment
-
-In SeqAn, the conversion from a CIGAR string to an alignment (two seqan3::aligned_sequence's) is done automatically for you.
-You can access it by accessing seqan3::sam_record::alignment from the record:
-
-\snippet doc/tutorial/10_sam_file/sam_file_alignments_without_ref.cpp main
+## Transforming the CIGAR information into an alignment
-In the example above, you can only safely access the aligned read.
-
-\attention The **unknown** aligned reference sequence at the first position in the alignment tuple cannot be accessed
- (e.g. via the `operator[]`). It is represented by a dummy type that throws on access.
+In SeqAn, we represent an alignment as a tuple of two `seqan3::aligned_sequence`s.
-Although the SAM format does not handle reference sequence information,
-you can provide these information to the seqan3::sam_file_input which automatically fills the alignment object.
-You can pass reference ids and reference sequences as additional constructor parameters:
+The conversion from a CIGAR string to an alignment can be done with the function `seqan3::alignment_from_cigar`.
+You need to pass the reference sequence with the position the read was aligned to and the read sequence:
-\snippet doc/tutorial/10_sam_file/sam_file_alignments_with_ref.cpp main
+\include snippet/alignment/cigar_conversion/alignment_from_cigar_io.cpp
The code will print the following:
-\include doc/tutorial/10_sam_file/sam_file_sam_record.err
+\include snippet/alignment/cigar_conversion/alignment_from_cigar_io.err
\assignment{Assignment 2: Combining sequence and alignment files}
@@ -218,7 +207,7 @@ Only use
* seqan3::sam_record::id,
* seqan3::sam_record::reference_id,
* seqan3::sam_record::mapping_quality, and
-* seqan3::sam_record::alignment.
+* seqan3::sam_record::cigar.
With that information do the following:
* Filter the alignment records and only take those with a mapping quality >= 30.
@@ -226,6 +215,12 @@ With that information do the following:
* For the resulting alignments, print which read was mapped against which reference id and
the number of `seqan3::gap`s in each sequence (aligned reference and read sequence).
+\hint
+Given your reference sequences, you need to know which reference sequence to use for the alignment.
+For this purpose, you can access `record.reference_id()` which gives you the position of the respective reference
+sequence.
+\endhint
+
Your program should print the following:
```
diff --git a/doc/tutorial/10_sam_file/sam_file_alignments_with_ref.cpp b/doc/tutorial/10_sam_file/sam_file_alignments_with_ref.cpp
deleted file mode 100644
index 3cee4dcdc6..0000000000
--- a/doc/tutorial/10_sam_file/sam_file_alignments_with_ref.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-#include
-seqan3::test::create_temporary_snippet_file example_sam{"example.sam",
- R"//![sam_file](
-@HD VN:1.6 SO:coordinate
-@SQ SN:ref LN:45
-r001 99 ref 7 30 8M2I4M1D3M = 37 39 TTAGATAAAGGATACTG *
-r003 0 ref 9 30 5S6M * 0 0 GCCTAAGCTAA *
-r004 0 ref 16 30 6M14N5M * 0 0 ATAGCTTCAGC *
-r003 2064 ref 29 17 5M * 0 0 TAGGC *
-r001 147 ref 37 30 9M = 7 -39 CAGCGGCAT * NM:i:1
-)//![sam_file]"}; // std::filesystem::current_path() / "example.sam" will be deleted after the execution
-
-//![main]
-#include
-
-int main()
-{
- using namespace seqan3::literals;
-
- auto filename = std::filesystem::current_path() / "example.sam";
-
- std::vector ref_ids{"ref"}; // list of one reference name
- std::vector ref_sequences{"AGAGTTCGAGATCGAGGACTAGCGACGAGGCAGCGAGCGATCGAT"_dna5};
-
- seqan3::sam_file_input fin{filename, ref_ids, ref_sequences};
-
- for (auto & record : fin)
- seqan3::debug_stream << record.alignment() << '\n'; // Now you can print the whole alignment!
-}
-//![main]
diff --git a/doc/tutorial/10_sam_file/sam_file_alignments_with_ref.err b/doc/tutorial/10_sam_file/sam_file_alignments_with_ref.err
deleted file mode 100644
index 7079da1389..0000000000
--- a/doc/tutorial/10_sam_file/sam_file_alignments_with_ref.err
+++ /dev/null
@@ -1,5 +0,0 @@
-(CGAGATCG--AGGACTAG,TTAGATAAAGGATA-CTG)
-(AGATCG,AGCTAA)
-(GGACTAGCGACGAGGCAGCGAGCGA,ATAGCT--------------TCAGC)
-(GGCAG,TAGGC)
-(GCGATCGAT,CAGCGGCAT)
diff --git a/doc/tutorial/10_sam_file/sam_file_alignments_without_ref.cpp b/doc/tutorial/10_sam_file/sam_file_alignments_without_ref.cpp
deleted file mode 100644
index bab3e97c40..0000000000
--- a/doc/tutorial/10_sam_file/sam_file_alignments_without_ref.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-#include
-seqan3::test::create_temporary_snippet_file example_sam{"example.sam",
- R"//![sam_file](
-@HD VN:1.6 SO:coordinate
-@SQ SN:ref LN:45
-r001 99 ref 7 30 8M2I4M1D3M = 37 39 TTAGATAAAGGATACTG *
-r003 0 ref 9 30 5S6M * 0 0 GCCTAAGCTAA *
-r004 0 ref 16 30 6M14N5M * 0 0 ATAGCTTCAGC *
-r003 2064 ref 29 17 5M * 0 0 TAGGC *
-r001 147 ref 37 30 9M = 7 -39 CAGCGGCAT * NM:i:1
-)//![sam_file]"}; // std::filesystem::current_path() / "example.sam" will be deleted after the execution
-
-//![main]
-#include
-
-int main()
-{
- auto filename = std::filesystem::current_path() / "example.sam";
-
- seqan3::sam_file_input fin{filename};
-
- for (auto & record : fin)
- seqan3::debug_stream << record.id() << ": " << std::get<1>(record.alignment()) << '\n';
-}
-//![main]
diff --git a/doc/tutorial/10_sam_file/sam_file_alignments_without_ref.err b/doc/tutorial/10_sam_file/sam_file_alignments_without_ref.err
deleted file mode 100644
index 1861652a3b..0000000000
--- a/doc/tutorial/10_sam_file/sam_file_alignments_without_ref.err
+++ /dev/null
@@ -1,5 +0,0 @@
-r001: TTAGATAAAGGATA-CTG
-r003: AGCTAA
-r004: ATAGCT--------------TCAGC
-r003: TAGGC
-r001: CAGCGGCAT
diff --git a/doc/tutorial/10_sam_file/sam_file_solution2.cpp b/doc/tutorial/10_sam_file/sam_file_solution2.cpp
index 5144fe1caf..851f23bae5 100644
--- a/doc/tutorial/10_sam_file/sam_file_solution2.cpp
+++ b/doc/tutorial/10_sam_file/sam_file_solution2.cpp
@@ -25,6 +25,7 @@ r003 2064 chr2 18 10 5M * 0 0 TAGGC *
#include
#include
+#include
#include
#include
#include
@@ -59,14 +60,19 @@ int main()
for (auto & record : mapping_file | mapq_filter)
{
+ auto alignment = seqan3::alignment_from_cigar(record.cigar_sequence(),
+ reference_sequences[record.reference_id().value()],
+ record.reference_position().value(),
+ record.sequence());
+
// as loop
size_t sum_reference{};
- for (auto const & char_reference : std::get<0>(record.alignment()))
+ for (auto const & char_reference : std::get<0>(alignment))
if (char_reference == seqan3::gap{})
++sum_reference;
// or via std::ranges::count
- size_t sum_read = std::ranges::count(std::get<1>(record.alignment()), seqan3::gap{});
+ size_t sum_read = std::ranges::count(std::get<1>(alignment), seqan3::gap{});
// The reference_id is ZERO based and an optional. -1 is represented by std::nullopt (= reference not known).
std::optional reference_id = record.reference_id();
diff --git a/doc/tutorial/10_sam_file/sam_file_writing.cpp b/doc/tutorial/10_sam_file/sam_file_writing.cpp
index c2af9ceecd..591538d5d9 100644
--- a/doc/tutorial/10_sam_file/sam_file_writing.cpp
+++ b/doc/tutorial/10_sam_file/sam_file_writing.cpp
@@ -5,12 +5,6 @@ seqan3::test::create_temporary_snippet_file example_sam{"out.sam", ""};
//![main]
#include
-using aligned_sequence_type = std::vector>;
-using alignment_type = std::pair;
-
-using types = seqan3::type_list, std::string, alignment_type>;
-using fields = seqan3::fields;
-
int main()
{
using namespace seqan3::literals;
@@ -18,6 +12,9 @@ int main()
auto filename = std::filesystem::current_path() / "out.sam";
seqan3::sam_file_output fout{filename};
+
+ using types = seqan3::type_list, std::string, std::vector>;
+ using fields = seqan3::fields;
using sam_record_type = seqan3::sam_record;
// write the following to the file
@@ -25,15 +22,10 @@ int main()
sam_record_type record{};
record.id() = "r001";
record.sequence() = "ACGTACGT"_dna5;
- auto & [reference_sequence, read_sequence] = record.alignment();
-
- // ACGT--GTTT
- seqan3::assign_unaligned(reference_sequence, "ACGTGTTT"_dna5);
- seqan3::insert_gap(reference_sequence, reference_sequence.begin() + 4, 2);
-
- // ACGTACGT--
- seqan3::assign_unaligned(read_sequence, record.sequence());
- seqan3::insert_gap(read_sequence, read_sequence.end(), 2);
+ record.cigar_sequence() = {{4, 'M'_cigar_operation},
+ {2, 'I'_cigar_operation},
+ {2, 'M'_cigar_operation},
+ {2, 'D'_cigar_operation}};
fout.push_back(record);
}
diff --git a/doc/tutorial/11_read_mapper/read_mapper_step4.cpp b/doc/tutorial/11_read_mapper/read_mapper_step4.cpp
index 449ebd6143..432605fff6 100644
--- a/doc/tutorial/11_read_mapper/read_mapper_step4.cpp
+++ b/doc/tutorial/11_read_mapper/read_mapper_step4.cpp
@@ -4,6 +4,7 @@
# include
# include
+# include
# include
# include
# include
@@ -53,7 +54,7 @@ void map_reads(std::filesystem::path const & query_path,
seqan3::field::id,
seqan3::field::ref_id,
seqan3::field::ref_offset,
- seqan3::field::alignment,
+ seqan3::field::cigar,
seqan3::field::qual,
seqan3::field::mapq>{}};
//! [sam_file_output]
@@ -80,7 +81,7 @@ void map_reads(std::filesystem::path const & query_path,
for (auto && alignment : seqan3::align_pairwise(std::tie(text_view, query), align_config))
{
- auto aligned_seq = alignment.alignment();
+ auto cigar = seqan3::cigar_from_alignment(alignment.alignment());
size_t ref_offset = alignment.sequence1_begin_position() + 2 + start;
size_t map_qual = 60u + alignment.score();
@@ -88,7 +89,7 @@ void map_reads(std::filesystem::path const & query_path,
record.id(),
storage.ids[result.reference_id()],
ref_offset,
- aligned_seq,
+ cigar,
record.base_qualities(),
map_qual);
}
diff --git a/include/seqan3/alphabet/cigar/cigar.hpp b/include/seqan3/alphabet/cigar/cigar.hpp
index e11131faa7..87089953ec 100644
--- a/include/seqan3/alphabet/cigar/cigar.hpp
+++ b/include/seqan3/alphabet/cigar/cigar.hpp
@@ -84,12 +84,10 @@ class cigar : public alphabet_tuple_base
*/
@@ -41,60 +41,6 @@ struct view_equality_fn
}
};
-/*!\brief Compares two seqan3::aligned_sequence values and returns their cigar operation.
- * \ingroup io_sam_file
- * \tparam reference_char_type Must be equality comparable to seqan3::gap.
- * \tparam query_char_type Must be equality comparable to seqan3::gap.
- * \param reference_char The aligned character of the reference to compare.
- * \param query_char The aligned character of the query to compare.
- * \param extended_cigar Whether to print the extended cigar alphabet or not. See cigar operation.
- * \returns A seqan3::cigar::operation representing the alignment operation between the two values.
- *
- * \details
- *
- * \note The resulting cigar operation is based on the query character (`query_char`).
- *
- * ### Example:
- *
- * The following alignment column shows the reference char ('C') on top and a
- * gap for the query char at the bottom.
- * ```
- * ... C ...
- * |
- * ... - ...
- * ```
- * In this case, the function seqan3::detail::map_aligned_values_to_cigar_op will return
- * 'D' since the query char is "deleted".
- *
- * The next alignment column shows the reference char ('C') on top and a
- * query char ('G') at the bottom.
- * ```
- * ... C ...
- * |
- * ... G ...
- * ```
- * In this case, the function seqan3::detail::map_aligned_values_to_cigar_op will return
- * 'M', for the basic cigar the two bases are aligned, while
- * in the extended cigar alphabet (`extended_cigar` = `true`) the function
- * will return an 'X' since the bases are aligned but are not
- * equal.
- * \sa seqan3::aligned_sequence
- */
-template
-[[nodiscard]] constexpr cigar::operation map_aligned_values_to_cigar_op(reference_char_type const reference_char,
- query_char_type const query_char,
- bool const extended_cigar)
- requires seqan3::detail::weakly_equality_comparable_with
- && seqan3::detail::weakly_equality_comparable_with
-{
- constexpr std::array operators{'M', 'D', 'I', 'P', 'X', '='}; // contains the possible cigar operators.
- uint8_t key = (static_cast(reference_char == gap{}) << 1) | static_cast(query_char == gap{});
- if (extended_cigar && (key == 0)) // in extended format refine the substitution operator to match/mismatch.
- key |= ((1 << 2) | static_cast(query_char == reference_char)); // maps to [4, 5].
-
- return assign_char_to(operators[key], cigar::operation{});
-}
-
/*!\brief Updates the sequence lengths by `cigar_count` depending on the cigar operation `op`.
* \ingroup io_sam_file
* \param[in, out] ref_length The reference sequence's length.
@@ -174,100 +120,6 @@ SEQAN3_WORKAROUND_LITERAL std::tuple, int32_t, int32_t> parse
return {operations, ref_length, seq_length};
}
-/*!\brief Creates a cigar string (SAM format) given a seqan3::detail::pairwise_alignment represented by two
- * seqan3::aligned_sequence's.
- * \ingroup io_sam_file
- *
- * \tparam alignment_type Must model seqan3::detail::pairwise_alignment.
- * \param alignment The alignment, represented by a pair of aligned sequences,
- * to be transformed into cigar vector based on the
- * second (query) sequence.
- * \param query_start_pos The start position of the alignment in the query
- * sequence indicating soft-clipping.
- * \param query_end_pos The end position of the alignment in the query
- * sequence indicating soft-clipping.
- * \param extended_cigar Whether to print the extended cigar alphabet or not. See cigar operation.
- * \returns A std::vector\ representing the alignment.
- *
- * \details
- *
- * \note The resulting cigar_vector is based on the query sequence, which is the second sequence in the `alignment`
- * pair.
- *
- * ### Example:
- *
- * Given the following alignment reference sequence on top and the query sequence at
- * the bottom:
- * ```
- * ATGG--CGTAGAGC
- * |||X |||X| |
- * ATGCCCCGTTG--C
- * ```
- * In this case, the function seqan3::detail::get_cigar_vector will return
- * the following cigar vector: "[('M',4),('I',2),('M',5),('D',2),('M',1)]".
- * The extended cigar string would look like this: "[('=',3)('X',1)('I',2)('=',3)('X',1)('=',1)('D',2)('=',1)]".
- *
- * \include test/snippet/io/sam_file/get_cigar_vector.cpp
- *
- * \sa seqan3::aligned_sequence
- */
-template
-[[nodiscard]] inline std::vector get_cigar_vector(alignment_type && alignment,
- uint32_t const query_start_pos = 0,
- uint32_t const query_end_pos = 0,
- bool const extended_cigar = false)
-{
- using std::get;
-
- auto & ref_seq = get<0>(alignment);
- auto & query_seq = get<1>(alignment);
-
- if (ref_seq.size() != query_seq.size())
- throw std::logic_error{"The aligned sequences must have the same length."};
-
- std::vector result{};
-
- if (!ref_seq.size())
- return result; // return empty string if sequences are empty
-
- // Add (S)oft-clipping at the start of the read
- if (query_start_pos)
- result.emplace_back(query_start_pos, 'S'_cigar_operation);
-
- // Create cigar string from alignment
- // -------------------------------------------------------------------------
- // initialize first operation and count value:
- cigar::operation operation{map_aligned_values_to_cigar_op(ref_seq[0], query_seq[0], extended_cigar)};
- uint32_t count{0};
-
- // go through alignment columns
- for (auto column : views::zip(ref_seq, query_seq))
- {
- cigar::operation next_op =
- map_aligned_values_to_cigar_op(std::get<0>(column), std::get<1>(column), extended_cigar);
-
- if (operation == next_op)
- {
- ++count;
- }
- else
- {
- result.emplace_back(count, operation);
- operation = next_op;
- count = 1;
- }
- }
-
- // append last cigar element
- result.emplace_back(count, operation);
-
- // Add (S)oft-clipping at the end of the read
- if (query_end_pos)
- result.emplace_back(query_end_pos, 'S'_cigar_operation);
-
- return result;
-}
-
/*!\brief Transforms a vector of cigar elements into a string representation.
* \ingroup io_sam_file
* \param cigar_vector The std::vector of seqan3::cigar elements to be transformed into a std::string.
@@ -284,48 +136,6 @@ template
return result;
}
-/*!\brief Creates a cigar string (SAM format) given a seqan3::detail::pairwise_alignment.
- * \ingroup io_sam_file
- *
- * \tparam alignment_type Must model seqan3::detail::pairwise_alignment.
- * \param alignment The alignment, represented by a seqan3::pair_like of seqan3::aligned_sequence's,
- * to be transformed into cigar vector based on the
- * second (query) sequence.
- * \param query_start_pos The start position of the alignment in the query
- * sequence indicating soft-clipping.
- * \param query_end_pos The end position of the alignment in the query
- * sequence indicating soft-clipping.
- * \param extended_cigar Whether to print the extended cigar alphabet or not. See cigar operation.
- * \returns A std::string representing the alignment as a cigar string.
- *
- * \details
- *
- * \note The resulting cigar_vector is based on the query sequence, which is the second sequence in the `alignment`
- * pair.
- *
- * ### Example:
- *
- * The following alignment reference sequence on top and the query sequence at
- * the bottom.
- * ```
- * ATGG--CGTAGAGC
- * |||X |||X| |
- * ATGCCCCGTTG--C
- * ```
- * In this case, the function seqan3::detail::get_cigar_string will return
- * the following cigar string when printed: "4M2I5M2D1M". The extended cigar
- * string would look like this: "3=1X2I3=1X1=2D1=".
- * \sa seqan3::aligned_sequence
- */
-template
-[[nodiscard]] inline std::string get_cigar_string(alignment_type && alignment,
- uint32_t const query_start_pos = 0,
- uint32_t const query_end_pos = 0,
- bool const extended_cigar = false)
-{
- return get_cigar_string(get_cigar_vector(alignment, query_start_pos, query_end_pos, extended_cigar));
-}
-
/*!\brief Transforms an alignment represented by two seqan3::aligned_sequence's into the corresponding cigar string.
* \ingroup io_sam_file
*
@@ -369,79 +179,6 @@ template
-inline void alignment_from_cigar(alignment_type & alignment, std::vector const & cigar_vector)
-{
- using std::get;
- auto current_ref_pos = std::ranges::begin(get<0>(alignment));
- auto current_read_pos = std::ranges::begin(get<1>(alignment));
-
- for (auto [cigar_count, cigar_operation] : cigar_vector)
- {
- // ignore since alignment shall contain sliced sequences
- if (('S'_cigar_operation == cigar_operation) || ('H'_cigar_operation == cigar_operation))
- continue;
-
- assert(('M'_cigar_operation == cigar_operation) || ('='_cigar_operation == cigar_operation)
- || ('X'_cigar_operation == cigar_operation) || ('D'_cigar_operation == cigar_operation)
- || ('N'_cigar_operation == cigar_operation) || ('I'_cigar_operation == cigar_operation)
- || ('P'_cigar_operation == cigar_operation)); // checked during IO
-
- if (('M'_cigar_operation == cigar_operation) || ('='_cigar_operation == cigar_operation)
- || ('X'_cigar_operation == cigar_operation))
- {
- std::ranges::advance(current_ref_pos, cigar_count);
- std::ranges::advance(current_read_pos, cigar_count);
- }
- else if (('D'_cigar_operation == cigar_operation) || ('N'_cigar_operation == cigar_operation))
- {
- // insert gaps into read
-
- assert(std::distance(current_read_pos, std::ranges::end(get<1>(alignment))) >= 0);
- current_read_pos = insert_gap(get<1>(alignment), current_read_pos, cigar_count);
- ++current_read_pos;
- std::ranges::advance(current_ref_pos, cigar_count);
- }
- else if (('I'_cigar_operation == cigar_operation)) // Insert gaps into ref
- {
- assert(std::ranges::distance(current_ref_pos, std::ranges::end(get<0>(alignment))) >= 0);
- current_ref_pos = insert_gap(get<0>(alignment), current_ref_pos, cigar_count);
- ++current_ref_pos;
- std::ranges::advance(current_read_pos, cigar_count);
- }
- else if (('P'_cigar_operation == cigar_operation)) // skip padding
- {
- current_ref_pos = insert_gap(get<0>(alignment), current_ref_pos, cigar_count);
- ++current_ref_pos;
-
- current_read_pos = insert_gap(get<1>(alignment), current_read_pos, cigar_count);
- ++current_read_pos;
- }
- }
-}
-
//!\brief A functor that always throws when calling `operator()` (needed for the alignment "dummy" sequence).
//!\ingroup io_sam_file
struct access_restrictor_fn
diff --git a/include/seqan3/io/sam_file/detail/format_sam_base.hpp b/include/seqan3/io/sam_file/detail/format_sam_base.hpp
index 88362d7468..64b55286d9 100644
--- a/include/seqan3/io/sam_file/detail/format_sam_base.hpp
+++ b/include/seqan3/io/sam_file/detail/format_sam_base.hpp
@@ -74,14 +74,6 @@ class format_sam_base
header_type & header,
ref_seqs_type & /*tag*/);
- template
- void construct_alignment(align_type & align,
- std::vector & cigar_vector,
- [[maybe_unused]] int32_t rid,
- [[maybe_unused]] ref_seqs_type & ref_seqs,
- [[maybe_unused]] int32_t ref_start,
- size_t ref_length);
-
void transfer_soft_clipping_to(std::vector const & cigar_vector, int32_t & sc_begin, int32_t & sc_end) const;
template
@@ -198,68 +190,6 @@ inline void format_sam_base::transfer_soft_clipping_to(std::vector const
sc_end = cigar_count_at(second_last_index);
}
-/*!\brief Construct the field::alignment depending on the given information.
- * \tparam align_type The alignment type.
- * \tparam ref_seqs_type The type of reference sequences (might decay to ignore).
- * \param[in,out] align The alignment (pair of aligned sequences) to fill.
- * \param[in] cigar_vector The cigar information to convert to an alignment.
- * \param[in] rid The index of the reference sequence in header.ref_ids().
- * \param[in] ref_seqs The reference sequence information.
- * \param[in] ref_start The start position of the alignment in the reference sequence.
- * \param[in] ref_length The length of the aligned reference sequence.
- */
-template
-inline void format_sam_base::construct_alignment(align_type & align,
- std::vector & cigar_vector,
- [[maybe_unused]] int32_t rid,
- [[maybe_unused]] ref_seqs_type & ref_seqs,
- [[maybe_unused]] int32_t ref_start,
- size_t ref_length)
-{
- if (rid > -1 && ref_start > -1 && // read is mapped
- !cigar_vector.empty() && // alignment field was not empty
- !std::ranges::empty(get<1>(align))) // seq field was not empty
- {
- if constexpr (!detail::decays_to_ignore_v)
- {
- assert(static_cast(ref_start + ref_length) <= std::ranges::size(ref_seqs[rid]));
- // copy over unaligned reference sequence part
- assign_unaligned(get<0>(align), ref_seqs[rid] | views::slice(ref_start, ref_start + ref_length));
- }
- else
- {
- using unaligned_t = std::remove_cvref_t(align))>>;
- auto dummy_seq = views::repeat_n(std::ranges::range_value_t{}, ref_length)
- | std::views::transform(detail::access_restrictor_fn{});
- static_assert(std::same_as,
- "No reference information was given so the type of the first alignment tuple position"
- "must have an unaligned sequence type of a dummy sequence ("
- "views::repeat_n(dna5{}, size_t{}) | "
- "std::views::transform(detail::access_restrictor_fn{}))");
-
- assign_unaligned(get<0>(align), dummy_seq); // assign dummy sequence
- }
-
- // insert gaps according to the cigar information
- detail::alignment_from_cigar(align, cigar_vector);
- }
- else // not enough information for an alignment, assign an empty view/dummy_sequence
- {
- if constexpr (!detail::decays_to_ignore_v) // reference info given
- {
- assert(std::ranges::size(ref_seqs) > 0); // we assume that the given ref info is not empty
- assign_unaligned(get<0>(align), ref_seqs[0] | views::slice(0, 0));
- }
- else
- {
- using unaligned_t = std::remove_cvref_t(align))>>;
- assign_unaligned(get<0>(align),
- views::repeat_n(std::ranges::range_value_t{}, 0)
- | std::views::transform(detail::access_restrictor_fn{}));
- }
- }
-}
-
/*!\brief Reads std::byte fields using std::from_chars.
* \tparam stream_view_t The type of the stream as a view.
*
diff --git a/include/seqan3/io/sam_file/format_bam.hpp b/include/seqan3/io/sam_file/format_bam.hpp
index 3f27e43a7e..6be24448b4 100644
--- a/include/seqan3/io/sam_file/format_bam.hpp
+++ b/include/seqan3/io/sam_file/format_bam.hpp
@@ -77,7 +77,6 @@ class format_bam : private detail::format_sam_base
typename ref_seq_type,
typename ref_id_type,
typename ref_offset_type,
- typename align_type,
typename cigar_type,
typename flag_type,
typename mapq_type,
@@ -98,7 +97,6 @@ class format_bam : private detail::format_sam_base
ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
ref_id_type & ref_id,
ref_offset_type & ref_offset,
- align_type & align,
cigar_type & cigar_vector,
flag_type & flag,
mapq_type & mapq,
@@ -113,7 +111,6 @@ class format_bam : private detail::format_sam_base
typename id_type,
typename ref_seq_type,
typename ref_id_type,
- typename align_type,
typename cigar_type,
typename qual_type,
typename mate_type,
@@ -128,7 +125,6 @@ class format_bam : private detail::format_sam_base
[[maybe_unused]] ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
[[maybe_unused]] ref_id_type && ref_id,
[[maybe_unused]] std::optional ref_offset,
- [[maybe_unused]] align_type && align,
[[maybe_unused]] cigar_type && cigar_vector,
[[maybe_unused]] sam_flag const flag,
[[maybe_unused]] uint8_t const mapq,
@@ -255,7 +251,6 @@ template tmp_cigar_vector{};
- [[maybe_unused]] int32_t ref_length{0}, seq_length{0}; // length of aligned part for ref and query
+ [[maybe_unused]] int32_t offset_tmp{}; // needed in case the cigar string was stored in the tag dictionary
+ [[maybe_unused]] int32_t ref_length{}; // needed in case the cigar string was stored in the tag dictionary
// Header
// -------------------------------------------------------------------------------------------------------------
@@ -434,11 +425,12 @@ format_bam::read_alignment_record(stream_type & stream,
// read cigar string
// -------------------------------------------------------------------------------------------------------------
- if constexpr (!detail::decays_to_ignore_v || !detail::decays_to_ignore_v)
+ if constexpr (!detail::decays_to_ignore_v)
{
- std::tie(tmp_cigar_vector, ref_length, seq_length) = parse_binary_cigar(stream_view, core.n_cigar_op);
- transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end);
- // the actual cigar_vector is swapped with tmp_cigar_vector at the end to avoid copying
+ int32_t seq_length{};
+ std::tie(cigar_vector, ref_length, seq_length) = parse_binary_cigar(stream_view, core.n_cigar_op);
+ int32_t soft_clipping_end{};
+ transfer_soft_clipping_to(cigar_vector, offset_tmp, soft_clipping_end);
}
else
{
@@ -468,56 +460,7 @@ format_bam::read_alignment_record(stream_type & stream,
++std::ranges::begin(stream_view);
};
- if constexpr (!detail::decays_to_ignore_v)
- {
- static_assert(sequence_container(align))>>,
- "If you want to read ALIGNMENT but not SEQ, the alignment"
- " object must store a sequence container at the second (query) position.");
-
- if (!tmp_cigar_vector.empty()) // only parse alignment if cigar information was given
- {
- assert(core.l_seq == (seq_length + offset_tmp + soft_clipping_end)); // sanity check
- using alph_t = std::ranges::range_value_t(align))>;
- constexpr auto from_dna16 = detail::convert_through_char_representation;
-
- get<1>(align).reserve(seq_length);
-
- auto tmp_iter = std::ranges::begin(seq_stream);
- std::ranges::advance(tmp_iter, offset_tmp / 2); // skip soft clipped bases at the beginning
-
- if (offset_tmp & 1)
- {
- get<1>(align).push_back(from_dna16[to_rank(get<1>(*tmp_iter))]);
- ++tmp_iter;
- --seq_length;
- }
-
- for (size_t i = (seq_length / 2); i > 0; --i)
- {
- get<1>(align).push_back(from_dna16[to_rank(get<0>(*tmp_iter))]);
- get<1>(align).push_back(from_dna16[to_rank(get<1>(*tmp_iter))]);
- ++tmp_iter;
- }
-
- if (seq_length & 1)
- {
- get<1>(align).push_back(from_dna16[to_rank(get<0>(*tmp_iter))]);
- ++tmp_iter;
- --soft_clipping_end;
- }
-
- std::ranges::advance(tmp_iter, (soft_clipping_end + !(seq_length & 1)) / 2);
- }
- else
- {
- skip_sequence_bytes();
- get<1>(align) = std::remove_reference_t(align))>{}; // assign empty container
- }
- }
- else
- {
- skip_sequence_bytes();
- }
+ skip_sequence_bytes();
}
else
{
@@ -537,14 +480,6 @@ format_bam::read_alignment_record(stream_type & stream,
seq.push_back(from_dna16[to_rank(d)]);
++std::ranges::begin(stream_view);
}
-
- if constexpr (!detail::decays_to_ignore_v)
- {
- assign_unaligned(get<1>(align),
- seq
- | views::slice(static_cast>(offset_tmp),
- std::ranges::distance(seq) - soft_clipping_end));
- }
}
}
@@ -578,7 +513,7 @@ format_bam::read_alignment_record(stream_type & stream,
// DONE READING - wrap up
// -------------------------------------------------------------------------------------------------------------
- if constexpr (!detail::decays_to_ignore_v || !detail::decays_to_ignore_v)
+ if constexpr (!detail::decays_to_ignore_v)
{
// Check cigar, if it matches ‘kSmN’, where ‘k’ equals lseq, ‘m’ is the reference sequence length in the
// alignment, and ‘S’ and ‘N’ are the soft-clipping and reference-clip, then the cigar string was larger
@@ -611,29 +546,15 @@ format_bam::read_alignment_record(stream_type & stream,
"record.")};
auto cigar_view = std::views::all(std::get(it->second));
- std::tie(tmp_cigar_vector, ref_length, seq_length) = detail::parse_cigar(cigar_view);
- offset_tmp = soft_clipping_end = 0;
- transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end);
+ int32_t seq_length{};
+ std::tie(cigar_vector, ref_length, seq_length) = detail::parse_cigar(cigar_view);
+ offset_tmp = 0;
+ int32_t soft_clipping_end{};
+ transfer_soft_clipping_to(cigar_vector, offset_tmp, soft_clipping_end);
tag_dict.erase(it); // remove redundant information
-
- if constexpr (!detail::decays_to_ignore_v)
- {
- assign_unaligned(
- get<1>(align),
- seq
- | views::slice(static_cast>(offset_tmp),
- std::ranges::distance(seq) - soft_clipping_end));
- }
}
}
}
-
- // Alignment object construction
- if constexpr (!detail::decays_to_ignore_v)
- construct_alignment(align, tmp_cigar_vector, core.refID, ref_seqs, core.pos, ref_length); // inherited from SAM
-
- if constexpr (!detail::decays_to_ignore_v)
- std::swap(cigar_vector, tmp_cigar_vector);
}
//!\copydoc seqan3::sam_file_output_format::write_alignment_record
@@ -643,7 +564,6 @@ template ref_offset,
- [[maybe_unused]] align_type && align,
[[maybe_unused]] cigar_type && cigar_vector,
[[maybe_unused]] sam_flag const flag,
[[maybe_unused]] uint8_t const mapq,
@@ -690,16 +609,6 @@ inline void format_bam::write_alignment_record([[maybe_unused]] stream_type & st
"over letters that model seqan3::alphabet or an integral or a std::optional.");
}
- static_assert(tuple_like>,
- "The align object must be a std::pair of two ranges whose "
- "value_type is comparable to seqan3::gap");
-
- static_assert((std::tuple_size_v> == 2
- && std::equality_comparable_with(align))>>
- && std::equality_comparable_with(align))>>),
- "The align object must be a std::pair of two ranges whose "
- "value_type is comparable to seqan3::gap");
-
static_assert((std::ranges::forward_range && alphabet>),
"The qual object must be a std::ranges::forward_range "
"over letters that model seqan3::alphabet.");
@@ -759,38 +668,20 @@ inline void format_bam::write_alignment_record([[maybe_unused]] stream_type & st
// ---------------------------------------------------------------------
int32_t ref_length{};
- // if alignment is non-empty, replace cigar_vector.
- // else, compute the ref_length from given cigar_vector which is needed to fill field `bin`.
+ // Compute the ref_length from given cigar_vector which is needed to fill field `bin`.
if (!std::ranges::empty(cigar_vector))
{
int32_t dummy_seq_length{};
for (auto & [count, operation] : cigar_vector)
detail::update_alignment_lengths(ref_length, dummy_seq_length, operation.to_char(), count);
}
- else if (!std::ranges::empty(get<0>(align)) && !std::ranges::empty(get<1>(align)))
- {
- ref_length = std::ranges::distance(get<1>(align));
-
- // compute possible distance from alignment end to sequence end
- // which indicates soft clipping at the end.
- // This should be replaced by a free count_gaps function for
- // aligned sequences which is more efficient if possible.
- int32_t off_end{static_cast(std::ranges::distance(seq)) - offset};
-
- for (auto chr : get<1>(align))
- if (chr == gap{})
- ++off_end;
-
- off_end -= ref_length;
- cigar_vector = detail::get_cigar_vector(align, offset, off_end);
- }
if (cigar_vector.size() >= (1 << 16)) // must be written into the sam tag CG
{
tag_dict["CG"_tag] = detail::get_cigar_string(cigar_vector);
cigar_vector.resize(2);
cigar_vector[0] = cigar{static_cast(std::ranges::distance(seq)), 'S'_cigar_operation};
- cigar_vector[1] = cigar{static_cast(std::ranges::distance(get<1>(align))), 'N'_cigar_operation};
+ cigar_vector[1] = cigar{static_cast(ref_length), 'N'_cigar_operation};
}
std::string tag_dict_binary_str = get_tag_dict_str(tag_dict);
diff --git a/include/seqan3/io/sam_file/format_sam.hpp b/include/seqan3/io/sam_file/format_sam.hpp
index fb44980a30..cc23316e3b 100644
--- a/include/seqan3/io/sam_file/format_sam.hpp
+++ b/include/seqan3/io/sam_file/format_sam.hpp
@@ -54,14 +54,14 @@ namespace seqan3
* [SAM format specifications](https://samtools.github.io/hts-specs/SAMv1.pdf).
* **SeqAn implements version 1.6 of the SAM specification**.
*
- * Take a look at our tutorial \ref tutorial_sam_file for a walk through of how to read alignment files.
+ * Take a look at our tutorial \ref tutorial_sam_file for a walk through of how to read SAM/BAM files.
*
* ### fields_specialisation
*
* The SAM format provides the following fields:
- * seqan3::field::alignment, seqan3::field::seq, seqan3::field::qual,
+ * seqan3::field::seq, seqan3::field::qual,
* seqan3::field::id, seqan3::field::ref_seq, seqan3::field::ref_id
- * seqan3::field::ref_ossfet, seqan3::field::offset, seqan3::field::flag,
+ * seqan3::field::ref_offset, seqan3::field::offset, seqan3::field::flag,
* seqan3::field::mapq and seqan3::field::mate.
* In addition there is the seqan3::field::header_ptr, which is usually only used internally
* to provide the range-based functionality of the file.
@@ -74,28 +74,19 @@ namespace seqan3
* Since many users will be accustomed to the columns of the SAM format, here is a
* mapping of the common SAM format columns to the SeqAn record fields:
*
- * | # | SAM Column ID | FIELD name |
- * |:--:|:--------------|:--------------------------------------------------|
- * | 1 | QNAME | seqan3::field::id |
- * | 2 | FLAG | seqan3::field::flag |
- * | 3 | RNAME | seqan3::field::ref_id |
- * | 4 | POS | seqan3::field::ref_offset |
- * | 5 | MAPQ | seqan3::field::mapq |
- * | 6 | CIGAR | implicilty stored in seqan3::field::alignment |
- * | 7 | RNEXT | seqan3::field::mate (tuple pos 0) |
- * | 8 | PNEXT | seqan3::field::mate (tuple pos 1) |
- * | 9 | TLEN | seqan3::field::mate (tuple pos 2) |
- * | 10 | SEQ | seqan3::field::seq |
- * | 11 | QUAL | seqan3::field::qual |
- *
- * The (read sequence/query) **OFFSET** will be required to store the soft
- * clipping information at the read start (end clipping will be automatically
- * deduced by how much the read sequence length + offset is larger than the
- * alignment length).
- *
- * Note: SeqAn currently does not support hard clipping. When reading SAM,
- * hard-clipping is discarded; but the resulting alignment/sequence combination
- * is still valid.
+ * | # | SAM Column ID | FIELD name |
+ * |:--:|:--------------|:----------------------------------|
+ * | 1 | QNAME | seqan3::field::id |
+ * | 2 | FLAG | seqan3::field::flag |
+ * | 3 | RNAME | seqan3::field::ref_id |
+ * | 4 | POS | seqan3::field::ref_offset |
+ * | 5 | MAPQ | seqan3::field::mapq |
+ * | 6 | CIGAR | seqan3::field::cigar |
+ * | 7 | RNEXT | seqan3::field::mate (tuple pos 0) |
+ * | 8 | PNEXT | seqan3::field::mate (tuple pos 1) |
+ * | 9 | TLEN | seqan3::field::mate (tuple pos 2) |
+ * | 10 | SEQ | seqan3::field::seq |
+ * | 11 | QUAL | seqan3::field::qual |
*
* ### Format Check
*
@@ -169,7 +160,6 @@ class format_sam : protected detail::format_sam_base
typename ref_seq_type,
typename ref_id_type,
typename ref_offset_type,
- typename align_type,
typename cigar_type,
typename flag_type,
typename mapq_type,
@@ -190,7 +180,6 @@ class format_sam : protected detail::format_sam_base
ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
ref_id_type & ref_id,
ref_offset_type & ref_offset,
- align_type & align,
cigar_type & cigar_vector,
flag_type & flag,
mapq_type & mapq,
@@ -205,7 +194,6 @@ class format_sam : protected detail::format_sam_base
typename id_type,
typename ref_seq_type,
typename ref_id_type,
- typename align_type,
typename qual_type,
typename mate_type,
typename tag_dict_type,
@@ -221,7 +209,6 @@ class format_sam : protected detail::format_sam_base
ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
ref_id_type && ref_id,
std::optional ref_offset,
- align_type && align,
std::vector const & cigar_vector,
sam_flag const flag,
uint8_t const mapq,
@@ -311,7 +298,6 @@ inline void format_sam::read_sequence_record(stream_type & stream,
std::ignore,
std::ignore,
std::ignore,
- std::ignore,
std::ignore);
}
@@ -337,7 +323,6 @@ inline void format_sam::write_sequence_record(stream_type & stream,
id_type && id,
qual_type && qualities)
{
- using default_align_t = std::pair>, std::span>>;
using default_mate_t = std::tuple, int32_t>;
sam_file_output_options output_options;
@@ -352,7 +337,6 @@ inline void format_sam::write_sequence_record(stream_type & stream,
/*ref_seq*/ std::string_view{},
/*ref_id*/ std::string_view{},
/*ref_offset*/ -1,
- /*align*/ default_align_t{},
/*cigar_vector*/ std::vector{},
/*flag*/ sam_flag::none,
/*mapq*/ 0,
@@ -374,7 +358,6 @@ template );
- // these variables need to be stored to compute the ALIGNMENT
- int32_t ref_offset_tmp{};
- std::ranges::range_value_t ref_id_tmp{};
- [[maybe_unused]] int32_t offset_tmp{};
- [[maybe_unused]] int32_t soft_clipping_end{};
- [[maybe_unused]] std::vector tmp_cigar_vector{};
- [[maybe_unused]] int32_t ref_length{0}, seq_length{0}; // length of aligned part for ref and query
+ int32_t ref_offset_tmp{}; // needed to read the ref_offset (int) beofre storing it in std::optional
+ std::ranges::range_value_t ref_id_tmp{}; // in case mate is requested but ref_offset not
// Header
// -------------------------------------------------------------------------------------------------------------
@@ -464,13 +441,16 @@ format_sam::read_alignment_record(stream_type & stream,
// Field 6: CIGAR
// -------------------------------------------------------------------------------------------------------------
- if constexpr (!detail::decays_to_ignore_v || !detail::decays_to_ignore_v)
+ if constexpr (!detail::decays_to_ignore_v)
{
if (!is_char<'*'>(*std::ranges::begin(stream_view))) // no cigar information given
{
- std::tie(tmp_cigar_vector, ref_length, seq_length) = detail::parse_cigar(field_view);
- transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end);
- // the actual cigar_vector is swapped with tmp_cigar_vector at the end to avoid copying
+ int32_t ref_length{}, seq_length{}; // length of aligned part for ref and query
+ std::tie(cigar_vector, ref_length, seq_length) = detail::parse_cigar(field_view);
+ int32_t soft_clipping_end{};
+ int32_t offset_tmp{};
+ transfer_soft_clipping_to(cigar_vector, offset_tmp, soft_clipping_end);
+ offset = offset_tmp;
}
else
{
@@ -482,8 +462,6 @@ format_sam::read_alignment_record(stream_type & stream,
detail::consume(field_view);
}
- offset = offset_tmp;
-
// Field 7-9: (RNEXT PNEXT TLEN) = MATE
// -------------------------------------------------------------------------------------------------------------
if constexpr (!detail::decays_to_ignore_v)
@@ -541,52 +519,11 @@ format_sam::read_alignment_record(stream_type & stream,
if constexpr (detail::decays_to_ignore_v)
{
- if constexpr (!detail::decays_to_ignore_v)
- {
- static_assert(sequence_container(align))>>,
- "If you want to read ALIGNMENT but not SEQ, the alignment"
- " object must store a sequence container at the second (query) position.");
-
- if (!tmp_cigar_vector.empty()) // only parse alignment if cigar information was given
- {
-
- auto tmp_iter = std::ranges::begin(seq_stream);
- std::ranges::advance(tmp_iter, offset_tmp);
-
- for (; seq_length > 0; --seq_length) // seq_length is not needed anymore
- {
- get<1>(align).push_back(
- std::ranges::range_value_t(align))>{}.assign_char(*tmp_iter));
- ++tmp_iter;
- }
-
- std::ranges::advance(tmp_iter, soft_clipping_end);
- }
- else
- {
- get<1>(align) = std::remove_reference_t(align))>{}; // empty container
- }
- }
- else
- {
- detail::consume(seq_stream);
- }
+ detail::consume(seq_stream);
}
else
{
read_forward_range_field(seq_stream, seq);
-
- if constexpr (!detail::decays_to_ignore_v)
- {
- if (!tmp_cigar_vector
- .empty()) // if no alignment info is given, the field::alignment should remain empty
- {
- assign_unaligned(get<1>(align),
- seq
- | views::slice(static_cast(offset_tmp),
- std::ranges::size(seq) - soft_clipping_end));
- }
- }
}
}
else
@@ -629,29 +566,6 @@ format_sam::read_alignment_record(stream_type & stream,
}
detail::consume(stream_view | detail::take_until(!(is_char<'\r'> || is_char<'\n'>))); // consume new line
-
- // DONE READING - wrap up
- // -------------------------------------------------------------------------------------------------------------
- // Alignment object construction
- // Note that the query sequence in get<1>(align) has already been filled while reading Field 10.
- if constexpr (!detail::decays_to_ignore_v)
- {
- int32_t ref_idx{(ref_id_tmp.empty() /*unmapped read?*/) ? -1 : 0};
-
- if constexpr (!detail::decays_to_ignore_v)
- {
- if (!ref_id_tmp.empty())
- {
- assert(header.ref_dict.count(ref_id_tmp) != 0); // taken care of in check_and_assign_ref_id()
- ref_idx = header.ref_dict[ref_id_tmp]; // get index for reference sequence
- }
- }
-
- construct_alignment(align, tmp_cigar_vector, ref_idx, ref_seqs, ref_offset_tmp, ref_length);
- }
-
- if constexpr (!detail::decays_to_ignore_v)
- std::swap(cigar_vector, tmp_cigar_vector);
}
//!\copydoc sam_file_output_format::write_alignment_record
@@ -661,7 +575,6 @@ template ref_offset,
- align_type && align,
std::vector const & cigar_vector,
sam_flag const flag,
uint8_t const mapq,
@@ -723,16 +635,6 @@ inline void format_sam::write_alignment_record(stream_type & stream,
"If you give indices as reference id information the header must also be present.");
}
- static_assert(tuple_like>,
- "The align object must be a std::pair of two ranges whose "
- "value_type is comparable to seqan3::gap");
-
- static_assert((std::tuple_size_v> == 2
- && std::equality_comparable_with(align))>>
- && std::equality_comparable_with(align))>>),
- "The align object must be a std::pair of two ranges whose "
- "value_type is comparable to seqan3::gap");
-
static_assert((std::ranges::forward_range && alphabet>),
"The qual object must be a std::ranges::forward_range "
"over letters that model seqan3::alphabet.");
@@ -867,23 +769,6 @@ inline void format_sam::write_alignment_record(stream_type & stream,
for (auto & c : cigar_vector) //TODO THIS IS PROBABLY TERRIBLE PERFORMANCE_WISE
stream_it.write_range(c.to_string());
}
- else if (!std::ranges::empty(get<0>(align)) && !std::ranges::empty(get<1>(align)))
- {
- // compute possible distance from alignment end to sequence end
- // which indicates soft clipping at the end.
- // This should be replace by a free count_gaps function for
- // aligned sequences which is more efficient if possible.
- size_t off_end{std::ranges::size(seq) - offset};
- for (auto chr : get<1>(align))
- if (chr == gap{})
- ++off_end;
-
- // Might happen if get<1>(align) doesn't correspond to the reference.
- assert(off_end >= std::ranges::size(get<1>(align)));
- off_end -= std::ranges::size(get<1>(align));
-
- write_range_or_asterisk(stream_it, detail::get_cigar_string(align, offset, off_end));
- }
else
{
*stream_it = '*';
diff --git a/include/seqan3/io/sam_file/header.hpp b/include/seqan3/io/sam_file/header.hpp
index 731df0c1a6..b98af2ea8d 100644
--- a/include/seqan3/io/sam_file/header.hpp
+++ b/include/seqan3/io/sam_file/header.hpp
@@ -24,7 +24,7 @@
namespace seqan3
{
-/*!\brief Stores the header information of alignment files.
+/*!\brief Stores the header information of SAM/BAM files.
* \ingroup io_sam_file
*
* \remark For a complete overview, take a look at \ref io_sam_file
@@ -168,7 +168,7 @@ class sam_file_header
* primary assembly.
* * **AN:** Alternative reference sequence names. A comma-separated list of alternative names that tools may use
* when referring to this reference sequence. These alternative names are not used elsewhere within the
- * SAM file; in particular, they must not appear in alignment records’ RNAME or RNEXT fields. regular
+ * SAM file; in particular, they must not appear in SAM records’ RNAME or RNEXT fields. regular
* expression : name (, name )* where name is [0-9A-Za-z][0-9A-Za-z*+.@ \|-]*
* * **AS:** Genome assembly identifier.
* * **M5:** MD5 checksum of the sequence. See Section 1.3.1
diff --git a/include/seqan3/io/sam_file/input.hpp b/include/seqan3/io/sam_file/input.hpp
index b828a8bd64..c36ae82926 100644
--- a/include/seqan3/io/sam_file/input.hpp
+++ b/include/seqan3/io/sam_file/input.hpp
@@ -21,7 +21,6 @@
#include
#include
-#include
#include
#include
#include
@@ -149,16 +148,6 @@ concept sam_file_input_traits =
// field::evalue is fixed to double
// field::bitscore is fixed to double
// field::mate is fixed to std::tuple, ref_offset_type, int32_t>
-
- // field::alignment
- // the alignment type cannot be configured.
- // Type of tuple entry 1 (reference) is set to
- // 1) a std::ranges::subrange over std::ranges::range_value_t if reference information was given
- // or 2) a "dummy" sequence type:
- // views::repeat_n(sequence_alphabet{}, size_t{}) | std::views::transform(detail::access_restrictor_fn{})
- // Type of tuple entry 2 (query) is set to
- // 1) a std::ranges::subrange over std::ranges::range_value_t if reference information was given
- // or 2) a "dummy" sequence type:
};
//!\endcond
@@ -222,7 +211,7 @@ struct sam_file_input_default_traits
// sam_file_input
// ---------------------------------------------------------------------------------------------------------------------
-/*!\brief A class for reading alignment files, e.g. SAM, BAM, BLAST ...
+/*!\brief A class for reading SAM files, both SAM and its binary representation BAM are supported.
* \ingroup io_sam_file
* \tparam traits_type An auxiliary type that defines certain member types and constants, must model
* seqan3::sam_file_input_traits.
@@ -243,7 +232,6 @@ template ,
field::offset,
field::ref_id,
field::ref_offset,
- field::alignment,
field::cigar,
field::mapq,
field::qual,
@@ -332,24 +320,12 @@ class sam_file_input
//!\brief The type of field::header_ptr (default: sam_file_header).
using header_type = sam_file_header;
-private:
- //!\brief The type of the aligned query sequence (second type of the pair of alignment_type).
- using alignment_query_type = std::conditional_t<
- selected_field_ids::contains(field::seq),
- gap_decorator() | views::slice(0, 0))>,
- typename traits_type::template sequence_container>>;
-
-public:
- //!\brief The type of field::alignment (default: std::pair>, std::vector>>).
- using alignment_type = std::tuple, alignment_query_type>;
-
//!\brief The previously defined types aggregated in a seqan3::type_list.
using field_types = type_list,
mapq_type,
quality_type,
@@ -360,20 +336,19 @@ class sam_file_input
/*!\brief The subset of seqan3::field tags valid for this file; order corresponds to the types in \ref field_types.
*
- * The SAM file abstraction supports reading 12 different fields:
+ * The SAM file abstraction supports reading 11 different fields:
*
* 1. seqan3::field::seq
* 2. seqan3::field::id
* 3. seqan3::field::offset
* 4. seqan3::field::ref_id
* 5. seqan3::field::ref_offset
- * 6. seqan3::field::alignment
- * 7. seqan3::field::cigar
- * 8. seqan3::field::mapq
- * 9. seqan3::field::qual
- * 10. seqan3::field::flag
- * 11. seqan3::field::mate
- * 12. seqan3::field::tags
+ * 6. seqan3::field::cigar
+ * 7. seqan3::field::mapq
+ * 8. seqan3::field::qual
+ * 9. seqan3::field::flag
+ * 10. seqan3::field::mate
+ * 11. seqan3::field::tags
*
* There exists one more field for SAM files, the seqan3::field::header_ptr, but this field is mostly used
* internally. Please see the seqan3::sam_file_output::header member function for details on how to access
@@ -384,7 +359,6 @@ class sam_file_input
field::offset,
field::ref_id,
field::ref_offset,
- field::alignment,
field::cigar,
field::mapq,
field::qual,
@@ -393,14 +367,19 @@ class sam_file_input
field::tags,
field::header_ptr>;
+ static_assert(!selected_field_ids::contains(field::alignment),
+ "The field::alignment is deprecated and only field::cigar is supported. Please see "
+ "seqan3::alignment_from_cigar on how to get an alignment from the cigar information.");
+
static_assert(
- []() constexpr {
+ []() constexpr
+ {
for (field f : selected_field_ids::as_array)
if (!field_ids::contains(f))
return false;
return true;
}(),
- "You selected a field that is not valid for alignment files, please refer to the documentation "
+ "You selected a field that is not valid for SAM files, please refer to the documentation "
"of sam_file_input::field_ids for the accepted values.");
//!\brief The type of the record, a specialisation of seqan3::record; acts as a tuple of the selected field types.
@@ -519,9 +498,14 @@ class sam_file_input
*
* \details
*
- * The reference information given by the ids (names) and sequences will be used to construct a proper alignment
- * when reading in SAM or BAM files. If you are not interested in the full alignment, call the constructor without
- * the parameters.
+ * ### Reference information
+ *
+ * The reference information given by the IDs (names) and sequences will be used to keep the record entry
+ * `seqan3::sam_file_input::record_type::reference_id()` consistent with the order imposed by `ref_ids`.
+ * This way, you can use the value of `seqan3::sam_file_input::record_type::reference_id()` to access the lists
+ * `ref_ids` and `ref_sequences` to retrieve the correct information for the current record.
+ *
+ * ### Selecting custom fields
*
* In addition to the file name and reference information, you may specify a custom seqan3::fields object
* (e.g. `seqan3::fields{}`) which may be easier than
@@ -556,9 +540,14 @@ class sam_file_input
*
* \details
*
- * The reference information given by the ids (names) and sequences will be used to construct a proper alignment
- * when reading in SAM or BAM files. If you are not interested in the full alignment, you do not need to specify
- * those information.
+ * ### Reference information
+ *
+ * The reference information given by the IDs (names) and sequences will be used to keep the record entry
+ * `seqan3::sam_file_input::record_type::reference_id()` consistent with the order imposed by `ref_ids`.
+ * This way, you can use the value of `seqan3::sam_file_input::record_type::reference_id()` to access the lists
+ * `ref_ids` and `ref_sequences` to retrieve the correct information for the current record.
+ *
+ * ### Selecting custom fields
*
* In addition to the stream, reference information and format, you may specify a custom seqan3::fields object
* (e.g. `seqan3::fields{}`) which may be easier than
@@ -805,11 +794,13 @@ class sam_file_input
*
* \details
*
- * The SAM format only provides semi-alignments because the reference sequence
- * is not stored explicitly. In order to be able to read in full alignments,
- * additional reference information can be given to the alignment file on construction.
+ * The reference information given by the IDs (`ref_ids`) and sequences (`ref_sequences`) will be used to keep the
+ * record entry `seqan3::sam_file_input::record_type::reference_id()` consistent with the order imposed by
+ * `ref_ids`. This way, you can use the value of `seqan3::sam_file_input::record_type::reference_id()` to access
+ * the lists `ref_ids` and `ref_sequences` to retrieve the correct information for the current record.
+ *
* Note that the reference ids (names) must correspond to the exact spelling
- * in the SAM/BAM file otherwise an exception will be thrown when reading.
+ * in the SAM/BAM file. Otherwise, an exception will be thrown when reading.
*/
template
void set_references(typename traits_type::ref_ids & ref_ids, ref_sequences_t && ref_sequences)
@@ -871,7 +862,6 @@ class sam_file_input
detail::get_or_ignore(record_buffer),
detail::get_or_ignore(record_buffer),
detail::get_or_ignore(record_buffer),
- detail::get_or_ignore(record_buffer),
detail::get_or_ignore(record_buffer),
detail::get_or_ignore(record_buffer),
detail::get_or_ignore(record_buffer),
diff --git a/include/seqan3/io/sam_file/input_format_concept.hpp b/include/seqan3/io/sam_file/input_format_concept.hpp
index e32407dfa8..b19fec3212 100644
--- a/include/seqan3/io/sam_file/input_format_concept.hpp
+++ b/include/seqan3/io/sam_file/input_format_concept.hpp
@@ -32,7 +32,7 @@
namespace seqan3::detail
{
-/*!\brief Internal class used to expose the actual format interface to read alignment records from the file.
+/*!\brief Internal class used to expose the actual format interface to read SAM records from the file.
* \ingroup io_sam_file
*
* \tparam format_type The type of the format to be exposed.
@@ -88,7 +88,6 @@ concept sam_file_input_format = requires (detail::sam_file_input_format_exposer<
dna5_vector & ref_seq,
std::optional & ref_id,
std::optional & ref_offset,
- std::pair>, std::vector>> & align,
std::vector & cigar,
sam_flag & flag,
uint8_t & mapq,
@@ -112,7 +111,6 @@ concept sam_file_input_format = requires (detail::sam_file_input_format_exposer<
ref_seq,
ref_id,
ref_offset,
- align,
cigar,
flag,
mapq,
@@ -141,7 +139,6 @@ concept sam_file_input_format = requires (detail::sam_file_input_format_exposer<
std::ignore,
std::ignore,
std::ignore,
- std::ignore,
std::ignore)
};
};
@@ -172,7 +169,6 @@ class sam_file_input_format
* ref_seq_type & ref_seq,
* ref_id_type & ref_id,
* ref_offset_type & ref_offset,
- * align_type & align,
* cigar_type & cigar_vector,
* flag_type & flag,
* mapq_type & mapq,
@@ -191,7 +187,6 @@ class sam_file_input_format
* \tparam ref_seq_type Type of the seqan3::field::ref_seq input (see seqan3::sam_file_input_traits).
* \tparam ref_id_type Type of the seqan3::field::ref_id input (see seqan3::sam_file_input_traits).
* \tparam ref_offset_type Type of the seqan3::field::ref_offset input (see seqan3::sam_file_input_traits).
- * \tparam align_type Type of the seqan3::field::alignment input (see seqan3::sam_file_input_traits).
* \tparam cigar_type Type of the seqan3::field::cigar input (a std::vector or std::ignore).
* \tparam flag_type Type of the seqan3::field::flag input (see seqan3::sam_file_input_traits).
* \tparam mapq_type Type of the seqan3::field::mapq input (see seqan3::sam_file_input_traits).
@@ -212,7 +207,6 @@ class sam_file_input_format
* \param[out] ref_seq The buffer for seqan3::field::ref_seq input.
* \param[out] ref_id The buffer for seqan3::field::ref_id input.
* \param[out] ref_offset The buffer for seqan3::field::ref_offset input.
- * \param[out] align The buffer for seqan3::field::alignment input.
* \param[out] cigar_vector The buffer for seqan3::field::cigar input.
* \param[out] flag The buffer for seqan3::field::flag input.
* \param[out] mapq The buffer for seqan3::field::mapq input.
diff --git a/include/seqan3/io/sam_file/output.hpp b/include/seqan3/io/sam_file/output.hpp
index 95e53a6c9e..521c0f07e9 100644
--- a/include/seqan3/io/sam_file/output.hpp
+++ b/include/seqan3/io/sam_file/output.hpp
@@ -44,7 +44,7 @@ namespace seqan3
// sam_file_output
// ----------------------------------------------------------------------------
-/*!\brief A class for writing alignment files, e.g. SAM, BAL, BLAST, ...
+/*!\brief A class for writing SAM files, both SAM and its binary representation BAM are supported.
* \ingroup io_sam_file
* \tparam selected_field_ids A seqan3::fields type with the list and order of
* fields IDs; only relevant if these can't be deduced.
@@ -62,7 +62,6 @@ template (r, std::string_view{}),
detail::get_or(r, std::ignore),
detail::get_or(r, std::optional{}),
- detail::get_or(r, default_align_t{}),
detail::get_or(r, std::vector{}),
detail::get_or(r, sam_flag::none),
detail::get_or(r, 0u),
@@ -460,7 +457,6 @@ class sam_file_output
detail::get_or(t, std::string_view{}),
detail::get_or(t, std::ignore),
detail::get_or(t, std::optional{}),
- detail::get_or(t, default_align_t{}),
detail::get_or(t, std::vector{}),
detail::get_or(t, sam_flag::none),
detail::get_or(t, 0u),
@@ -679,7 +675,7 @@ class sam_file_output
template
void write_record(record_header_ptr_t && record_header_ptr, pack_type &&... remainder)
{
- static_assert((sizeof...(pack_type) == 15), "Wrong parameter list passed to write_record.");
+ static_assert((sizeof...(pack_type) == 14), "Wrong parameter list passed to write_record.");
assert(!format.valueless_by_exception());
diff --git a/include/seqan3/io/sam_file/output_format_concept.hpp b/include/seqan3/io/sam_file/output_format_concept.hpp
index e3c4b21c4d..527a8c6ce9 100644
--- a/include/seqan3/io/sam_file/output_format_concept.hpp
+++ b/include/seqan3/io/sam_file/output_format_concept.hpp
@@ -18,7 +18,6 @@
#include
#include
-#include
#include
#include
#include
@@ -31,7 +30,7 @@
namespace seqan3::detail
{
-/*!\brief Internal class used to expose the actual format interface to write alignment records into the file.
+/*!\brief Internal class used to expose the actual format interface to write SAM/BAM records into the file.
* \ingroup io_sam_file
*
* \tparam format_type The type of the format to be exposed.
@@ -93,7 +92,6 @@ concept sam_file_output_format = requires (detail::sam_file_output_format_expose
dna5_vector & ref_seq,
std::optional & ref_id,
std::optional & ref_offset,
- std::pair>, std::vector>> & align,
std::vector & cigar,
sam_flag & flag,
uint8_t & mapq,
@@ -114,7 +112,6 @@ concept sam_file_output_format = requires (detail::sam_file_output_format_expose
ref_seq,
ref_id,
ref_offset,
- align,
cigar,
flag,
mapq,
@@ -149,7 +146,6 @@ class sam_file_output_format
ref_seq_type && ref_seq,
ref_id_type && ref_id,
ref_offset_type && ref_offset,
- align_type && align,
std::vector & cigar_vector,
flag_type && flag,
mapq_type && mapq,
@@ -165,7 +161,6 @@ class sam_file_output_format
* \tparam ref_seq_type Type of the seqan3
* \tparam ref_id_type Type of the seqan3
* \tparam ref_offset_type Type of the seqan3
- * \tparam align_type Type of the seqan3
* \tparam flag_type Type of the seqan3
* \tparam mapq_type Type of the seqan3
* \tparam qual_type Type of the seqan3
@@ -184,7 +179,6 @@ class sam_file_output_format
* \param[in] ref_seq The data for seqan3::field::ref_offset, i.e. the reference sequence.
* \param[in] ref_id The data for seqan3::field::ref_id, e.g. the reference id..
* \param[in] ref_offset The data for seqan3::field::ref_offset, i.e. the start position of the alignment in \p ref_seq.
- * \param[in] align The data for seqan3::field::align, e.g. the alignment between query and ref.
* \param[in] cigar_vector The data for seqan3::field::cigar, e.g. representing the alignment between query and ref.
* \param[in] flag The data for seqan3::field::flag, e.g. the SAM mapping flag value.
* \param[in] mapq The data for seqan3::field::mapq, e.g. the mapping quality value.
diff --git a/include/seqan3/io/sam_file/record.hpp b/include/seqan3/io/sam_file/record.hpp
index de642c6108..22f87e98c9 100644
--- a/include/seqan3/io/sam_file/record.hpp
+++ b/include/seqan3/io/sam_file/record.hpp
@@ -152,28 +152,21 @@ class sam_record : public record
return get_impl(field_constant{}, static_cast(*this));
}
- /*!\brief The (pairwise) alignment stored in an object that models seqan3::detail::pairwise_alignment.
- * \returns seqan3::sam_file_input::alignment_type per default
+ /*!\brief [DEPRECATED] The (pairwise) alignment stored in an object that models seqan3::detail::pairwise_alignment.
+ *
+ *\deprecated Please access `cigar()` and then use `seqan3::alignment_from_cigar` to retrieve the alignment.
*/
- decltype(auto) alignment() &&
- {
- return get_impl(field_constant{}, static_cast(*this));
- }
+ SEQAN3_DEPRECATED_340 decltype(auto) alignment() &&
+ {}
//!\copydoc seqan3::sam_record::alignment
- decltype(auto) alignment() const &&
- {
- return get_impl(field_constant{}, static_cast(*this));
- }
+ SEQAN3_DEPRECATED_340 decltype(auto) alignment() const &&
+ {}
//!\copydoc seqan3::sam_record::alignment
- decltype(auto) alignment() &
- {
- return get_impl(field_constant{}, static_cast(*this));
- }
+ SEQAN3_DEPRECATED_340 decltype(auto) alignment() &
+ {}
//!\copydoc seqan3::sam_record::alignment
- decltype(auto) alignment() const &
- {
- return get_impl(field_constant{}, static_cast(*this));
- }
+ SEQAN3_DEPRECATED_340 decltype(auto) alignment() const &
+ {}
/*!\brief The identifier of the (reference) sequence that seqan3::sam_record::sequence was aligned to.
* (SAM Column ID: RNAME)
diff --git a/include/seqan3/io/sam_file/sam_flag.hpp b/include/seqan3/io/sam_file/sam_flag.hpp
index 0af698bbcf..acc36f272f 100644
--- a/include/seqan3/io/sam_file/sam_flag.hpp
+++ b/include/seqan3/io/sam_file/sam_flag.hpp
@@ -18,7 +18,7 @@
namespace seqan3
{
-//!\brief Type tag which indicates that no reference information has been passed to the alignment file on construction.
+//!\brief Type tag which indicates that no reference information has been passed to the SAM file on construction.
//!\ingroup io_sam_file
struct ref_info_not_given
{};
diff --git a/test/performance/io/format_sam_benchmark.cpp b/test/performance/io/format_sam_benchmark.cpp
index 3a3024cbe3..642b06c1d6 100644
--- a/test/performance/io/format_sam_benchmark.cpp
+++ b/test/performance/io/format_sam_benchmark.cpp
@@ -7,6 +7,7 @@
#include
+#include
#include
#include
#include
@@ -55,7 +56,7 @@ static std::string create_sam_file_string(size_t const n_queries)
seqan3::field::offset,
seqan3::field::ref_id,
seqan3::field::ref_offset,
- seqan3::field::alignment,
+ seqan3::field::cigar,
seqan3::field::mapq,
seqan3::field::qual,
seqan3::field::flag>;
@@ -69,15 +70,15 @@ static std::string create_sam_file_string(size_t const n_queries)
auto align_result = *(seqan3::align_pairwise(std::tie(reference, query), config).begin());
std::string const current_query_id = query_prefix + std::to_string(i);
- sam_out.emplace_back(query, // field::seq
- current_query_id, // field::id
- align_result.sequence2_begin_position(), // field::offset
- reference_id, // field::ref_id
- align_result.sequence1_begin_position(), // field::ref_offset
- align_result.alignment(), // field::alignment
- align_result.score(), // field::mapq
- qualities, // field::qual
- seqan3::sam_flag::none); // field::flag
+ sam_out.emplace_back(query, // field::seq
+ current_query_id, // field::id
+ align_result.sequence2_begin_position(), // field::offset
+ reference_id, // field::ref_id
+ align_result.sequence1_begin_position(), // field::ref_offset
+ seqan3::cigar_from_alignment(align_result.alignment()), // field::cigar
+ align_result.score(), // field::mapq
+ qualities, // field::qual
+ seqan3::sam_flag::none); // field::flag
}
file_dict[n_queries] = stream.str();
diff --git a/test/snippet/io/sam_file/get_cigar_vector.cpp b/test/snippet/io/sam_file/get_cigar_vector.cpp
deleted file mode 100644
index 97cf5d0fc6..0000000000
--- a/test/snippet/io/sam_file/get_cigar_vector.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-#include
-#include
-#include
-#include
-
-int main()
-{
- using aligned_t = std::vector>;
- using namespace seqan3::literals;
-
- aligned_t ref{'A'_dna4,
- 'T'_dna4,
- 'G'_dna4,
- 'G'_dna4,
- seqan3::gap{},
- seqan3::gap{},
- 'C'_dna4,
- 'G'_dna4,
- 'T'_dna4,
- 'A'_dna4,
- 'G'_dna4,
- 'A'_dna4,
- 'G'_dna4,
- 'C'_dna4};
-
- aligned_t query{'A'_dna4,
- 'T'_dna4,
- 'G'_dna4,
- 'C'_dna4,
- 'C'_dna4,
- 'C'_dna4,
- 'C'_dna4,
- 'G'_dna4,
- 'T'_dna4,
- 'T'_dna4,
- 'G'_dna4,
- seqan3::gap{},
- seqan3::gap{},
- 'C'_dna4};
-
- seqan3::debug_stream << seqan3::detail::get_cigar_vector(std::tie(ref, query)) << '\n';
-}
diff --git a/test/snippet/io/sam_file/get_cigar_vector.err b/test/snippet/io/sam_file/get_cigar_vector.err
deleted file mode 100644
index 0259163989..0000000000
--- a/test/snippet/io/sam_file/get_cigar_vector.err
+++ /dev/null
@@ -1 +0,0 @@
-[4M,2I,5M,2D,1M]
diff --git a/test/snippet/io/sam_file/sam_file_input_construction_without_automatic_type_deduction.cpp b/test/snippet/io/sam_file/sam_file_input_construction_without_automatic_type_deduction.cpp
index c380da65f0..e9908f3ecd 100644
--- a/test/snippet/io/sam_file/sam_file_input_construction_without_automatic_type_deduction.cpp
+++ b/test/snippet/io/sam_file/sam_file_input_construction_without_automatic_type_deduction.cpp
@@ -14,7 +14,6 @@ int main()
seqan3::field::offset,
seqan3::field::ref_id,
seqan3::field::ref_offset,
- seqan3::field::alignment,
seqan3::field::cigar,
seqan3::field::mapq,
seqan3::field::qual,
diff --git a/test/unit/alignment/aligned_sequence_test_template.hpp b/test/unit/alignment/aligned_sequence_test_template.hpp
index b86b0350a6..d5170a88e1 100644
--- a/test/unit/alignment/aligned_sequence_test_template.hpp
+++ b/test/unit/alignment/aligned_sequence_test_template.hpp
@@ -11,6 +11,7 @@
#include
#include
+#include
#include
#include
#include
@@ -261,9 +262,8 @@ TYPED_TEST_P(aligned_sequence, cigar_string)
seqan3::insert_gap(ref, std::ranges::next(begin(ref), 7), 2);
seqan3::insert_gap(read, std::ranges::next(begin(read), 4), 1);
- std::string expected = "4M1D2M2I3M";
-
- EXPECT_EQ(expected, seqan3::detail::get_cigar_string(std::make_pair(ref, read)));
+ auto cigar = seqan3::cigar_from_alignment(std::make_pair(ref, read));
+ EXPECT_EQ(seqan3::detail::get_cigar_string(cigar), "4M1D2M2I3M");
TypeParam ref2;
TypeParam read2;
@@ -276,9 +276,8 @@ TYPED_TEST_P(aligned_sequence, cigar_string)
insert_gap(read2, std::ranges::next(begin(read2), 4), 1);
insert_gap(read2, std::ranges::begin(read2), 1);
- std::string expected2 = "1P2I2M1D4M2I1M2D2P";
-
- EXPECT_EQ(expected2, seqan3::detail::get_cigar_string(std::make_pair(ref2, read2)));
+ auto cigar2 = seqan3::cigar_from_alignment(std::make_pair(ref2, read2));
+ EXPECT_EQ(seqan3::detail::get_cigar_string(cigar2), "1P2I2M1D4M2I1M2D2P");
}
{
// with soft clipping
@@ -291,9 +290,8 @@ TYPED_TEST_P(aligned_sequence, cigar_string)
insert_gap(ref, std::ranges::next(std::ranges::begin(ref), 7), 2);
insert_gap(read, std::ranges::next(std::ranges::begin(read), 4), 1);
- std::string expected = "5S4M1D2M2I3M60S";
-
- EXPECT_EQ(expected, seqan3::detail::get_cigar_string(std::make_pair(ref, read), 5, 60));
+ auto cigar = seqan3::cigar_from_alignment(std::make_pair(ref, read), {.soft_front = 5, .soft_back = 60});
+ EXPECT_EQ(seqan3::detail::get_cigar_string(cigar), "5S4M1D2M2I3M60S");
// gaps at the end
TypeParam ref2;
@@ -307,9 +305,8 @@ TYPED_TEST_P(aligned_sequence, cigar_string)
seqan3::insert_gap(read2, std::ranges::next(begin(read2), 4), 1);
seqan3::insert_gap(read2, std::ranges::begin(read2), 1);
- std::string expected2 = "3S1P2I2M1D4M2I1M2D2P5S";
-
- EXPECT_EQ(expected2, seqan3::detail::get_cigar_string(std::make_pair(ref2, read2), 3, 5));
+ auto cigar2 = seqan3::cigar_from_alignment(std::make_pair(ref2, read2), {.soft_front = 3, .soft_back = 5});
+ EXPECT_EQ(seqan3::detail::get_cigar_string(cigar2), "3S1P2I2M1D4M2I1M2D2P5S");
}
{
// no gaps at the end
@@ -322,11 +319,10 @@ TYPED_TEST_P(aligned_sequence, cigar_string)
seqan3::insert_gap(ref, std::ranges::next(std::ranges::begin(ref), 7), 2);
seqan3::insert_gap(read, std::ranges::next(std::ranges::begin(read), 4), 1);
- std::string expected1 = "4=1D2X2I1=1X1=";
- std::string expected2 = "5S4=1D2X2I1=1X1=60S";
-
- EXPECT_EQ(expected1, seqan3::detail::get_cigar_string(std::make_pair(ref, read), 0, 0, true));
- EXPECT_EQ(expected2, seqan3::detail::get_cigar_string(std::make_pair(ref, read), 5, 60, true));
+ auto cigar = seqan3::cigar_from_alignment(std::make_pair(ref, read), {}, true);
+ EXPECT_EQ(seqan3::detail::get_cigar_string(cigar), "4=1D2X2I1=1X1=");
+ auto cigar2 = seqan3::cigar_from_alignment(std::make_pair(ref, read), {.soft_front = 5, .soft_back = 60}, true);
+ EXPECT_EQ(seqan3::detail::get_cigar_string(cigar2), "5S4=1D2X2I1=1X1=60S");
}
}
diff --git a/test/unit/alignment/cigar_conversion/alignment_from_cigar_test.cpp b/test/unit/alignment/cigar_conversion/alignment_from_cigar_test.cpp
index 8f792c590b..a775a51e1e 100644
--- a/test/unit/alignment/cigar_conversion/alignment_from_cigar_test.cpp
+++ b/test/unit/alignment/cigar_conversion/alignment_from_cigar_test.cpp
@@ -158,3 +158,24 @@ TEST_F(alignment_from_cigar, with_hard_clipping)
EXPECT_RANGE_EQ(std::get<0>(alignment), this->cigar_with_hard_clipping_gapped_ref);
EXPECT_RANGE_EQ(std::get<1>(alignment), this->cigar_with_hard_clipping_gapped_seq);
}
+
+TEST_F(alignment_from_cigar, short_cigar_string_with_softclipping)
+{
+ seqan3::dna5_vector seq = "AGAGGGGGATAACCA"_dna5;
+
+ { // soft clipping at front
+ std::vector short_cigar{{5, 'S'_cigar_operation}, {10, 'M'_cigar_operation}}; // 5S 10M
+
+ auto alignment = seqan3::alignment_from_cigar(short_cigar, this->ref, 0, seq);
+
+ EXPECT_RANGE_EQ(std::get<1>(alignment), "GGGATAACCA"_dna5);
+ }
+
+ { // soft clipping at back
+ std::vector short_cigar{{10, 'M'_cigar_operation}, {5, 'S'_cigar_operation}}; // 10M 5S
+
+ auto alignment = seqan3::alignment_from_cigar(short_cigar, this->ref, 0, seq);
+
+ EXPECT_RANGE_EQ(std::get<1>(alignment), "AGAGGGGGAT"_dna5);
+ }
+}
diff --git a/test/unit/io/sam_file/format_bam_test.cpp b/test/unit/io/sam_file/format_bam_test.cpp
index 378887bae5..eedd8da0c2 100644
--- a/test/unit/io/sam_file/format_bam_test.cpp
+++ b/test/unit/io/sam_file/format_bam_test.cpp
@@ -308,72 +308,48 @@ struct sam_file_read : public sam_file_data
'\x02', '\x02', '\x03', '\x61', '\x61', '\x41', '\x63', '\x41', '\x53', '\x43', '\x02', '\x66', '\x66',
'\x66', '\x66', '\x66', '\x46', '\x40', '\x7A', '\x7A', '\x5A', '\x73', '\x74', '\x72', '\x00', '\x0A'};
- std::string simple_three_reads_output{
- // no hard clipping in output
- '\x42', '\x41', '\x4D', '\x01', '\x1C', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4E',
- '\x3A', '\x31', '\x2E', '\x36', '\x0A', '\x40', '\x53', '\x51', '\x09', '\x53', '\x4E', '\x3A', '\x72', '\x65',
- '\x66', '\x09', '\x4C', '\x4E', '\x3A', '\x33', '\x34', '\x0A', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00',
- '\x00', '\x00', '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00', '\x48', '\x00', '\x00', '\x00',
- '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x06', '\x3D', '\x49', '\x12', '\x05', '\x00',
- '\x29', '\x00', '\x04', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', '\x00', '\x00',
- '\x2C', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x31', '\x00', '\x14', '\x00', '\x00', '\x00',
- '\x10', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', '\x00',
- '\x00', '\x00', '\x12', '\x48', '\x00', '\x02', '\x02', '\x03', '\x41', '\x53', '\x43', '\x02', '\x4E', '\x4D',
- '\x43', '\x07', '\x52', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x01', '\x00', '\x00', '\x00',
- '\x06', '\x3E', '\x49', '\x12', '\x04', '\x00', '\x2A', '\x00', '\x09', '\x00', '\x00', '\x00', '\x00', '\x00',
- '\x00', '\x00', '\x09', '\x00', '\x00', '\x00', '\x2C', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64',
- '\x32', '\x00', '\x70', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00',
- '\x14', '\x00', '\x00', '\x00', '\x14', '\x42', '\x84', '\xF1', '\x40', '\x00', '\x02', '\x02', '\x03', '\x05',
- '\x06', '\x07', '\x08', '\x09', '\x78', '\x79', '\x42', '\x53', '\x03', '\x00', '\x00', '\x00', '\x03', '\x00',
- '\x04', '\x00', '\x05', '\x00', '\x5A', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x02', '\x00',
- '\x00', '\x00', '\x06', '\x3F', '\x49', '\x12', '\x0A', '\x00', '\x2B', '\x00', '\x08', '\x00', '\x00', '\x00',
- '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', '\x00', '\x00', '\x2C', '\x01', '\x00', '\x00', '\x72', '\x65',
- '\x61', '\x64', '\x33', '\x00', '\x14', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x16', '\x00',
- '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00',
- '\x11', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x14', '\x00',
- '\x00', '\x00', '\x44', '\x14', '\x81', '\x81', '\x00', '\x00', '\x09', '\x0A', '\x0B', '\x0C', '\x0D', '\x0E'};
-
std::string verbose_output{
- '\x42', '\x41', '\x4D', '\x01', '\xA6', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4E',
- '\x3A', '\x31', '\x2E', '\x36', '\x09', '\x53', '\x4F', '\x3A', '\x75', '\x6E', '\x6B', '\x6E', '\x6F', '\x77',
- '\x6E', '\x09', '\x47', '\x4F', '\x3A', '\x6E', '\x6F', '\x6E', '\x65', '\x0A', '\x40', '\x53', '\x51', '\x09',
- '\x53', '\x4E', '\x3A', '\x72', '\x65', '\x66', '\x09', '\x4C', '\x4E', '\x3A', '\x33', '\x34', '\x09', '\x41',
- '\x4E', '\x3A', '\x6F', '\x74', '\x68', '\x65', '\x72', '\x5F', '\x6E', '\x61', '\x6D', '\x65', '\x0A', '\x40',
- '\x52', '\x47', '\x09', '\x49', '\x44', '\x3A', '\x67', '\x72', '\x6F', '\x75', '\x70', '\x31', '\x09', '\x44',
- '\x53', '\x3A', '\x6D', '\x6F', '\x72', '\x65', '\x20', '\x69', '\x6E', '\x66', '\x6F', '\x0A', '\x40', '\x50',
- '\x47', '\x09', '\x49', '\x44', '\x3A', '\x70', '\x72', '\x6F', '\x67', '\x31', '\x09', '\x50', '\x4E', '\x3A',
- '\x63', '\x6F', '\x6F', '\x6C', '\x5F', '\x70', '\x72', '\x6F', '\x67', '\x72', '\x61', '\x6D', '\x09', '\x43',
- '\x4C', '\x3A', '\x2E', '\x2F', '\x70', '\x72', '\x6F', '\x67', '\x31', '\x09', '\x50', '\x50', '\x3A', '\x61',
- '\x09', '\x44', '\x53', '\x3A', '\x62', '\x09', '\x56', '\x4E', '\x3A', '\x63', '\x0A', '\x40', '\x43', '\x4F',
- '\x09', '\x54', '\x68', '\x69', '\x73', '\x20', '\x69', '\x73', '\x20', '\x61', '\x20', '\x63', '\x6F', '\x6D',
- '\x6D', '\x65', '\x6E', '\x74', '\x2E', '\x0A', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00', '\x00',
+ '\x42', '\x41', '\x4d', '\x01', '\xa6', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4e',
+ '\x3a', '\x31', '\x2e', '\x36', '\x09', '\x53', '\x4f', '\x3a', '\x75', '\x6e', '\x6b', '\x6e', '\x6f', '\x77',
+ '\x6e', '\x09', '\x47', '\x4f', '\x3a', '\x6e', '\x6f', '\x6e', '\x65', '\x0a', '\x40', '\x53', '\x51', '\x09',
+ '\x53', '\x4e', '\x3a', '\x72', '\x65', '\x66', '\x09', '\x4c', '\x4e', '\x3a', '\x33', '\x34', '\x09', '\x41',
+ '\x4e', '\x3a', '\x6f', '\x74', '\x68', '\x65', '\x72', '\x5f', '\x6e', '\x61', '\x6d', '\x65', '\x0a', '\x40',
+ '\x52', '\x47', '\x09', '\x49', '\x44', '\x3a', '\x67', '\x72', '\x6f', '\x75', '\x70', '\x31', '\x09', '\x44',
+ '\x53', '\x3a', '\x6d', '\x6f', '\x72', '\x65', '\x20', '\x69', '\x6e', '\x66', '\x6f', '\x0a', '\x40', '\x50',
+ '\x47', '\x09', '\x49', '\x44', '\x3a', '\x70', '\x72', '\x6f', '\x67', '\x31', '\x09', '\x50', '\x4e', '\x3a',
+ '\x63', '\x6f', '\x6f', '\x6c', '\x5f', '\x70', '\x72', '\x6f', '\x67', '\x72', '\x61', '\x6d', '\x09', '\x43',
+ '\x4c', '\x3a', '\x2e', '\x2f', '\x70', '\x72', '\x6f', '\x67', '\x31', '\x09', '\x50', '\x50', '\x3a', '\x61',
+ '\x09', '\x44', '\x53', '\x3a', '\x62', '\x09', '\x56', '\x4e', '\x3a', '\x63', '\x0a', '\x40', '\x43', '\x4f',
+ '\x09', '\x54', '\x68', '\x69', '\x73', '\x20', '\x69', '\x73', '\x20', '\x61', '\x20', '\x63', '\x6f', '\x6d',
+ '\x6d', '\x65', '\x6e', '\x74', '\x2e', '\x0a', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00', '\x00',
'\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00', '\x64', '\x00', '\x00', '\x00', '\x00', '\x00',
- '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x06', '\x3D', '\x49', '\x12', '\x05', '\x00', '\x29', '\x00',
- '\x04', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', '\x00', '\x00', '\x2C', '\x01',
+ '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x06', '\x3d', '\x49', '\x12', '\x05', '\x00', '\x29', '\x00',
+ '\x04', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', '\x00', '\x00', '\x2c', '\x01',
'\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x31', '\x00', '\x14', '\x00', '\x00', '\x00', '\x10', '\x00',
'\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', '\x00', '\x00', '\x00',
- '\x12', '\x48', '\x00', '\x02', '\x02', '\x03', '\x41', '\x53', '\x43', '\x02', '\x43', '\x43', '\x53', '\x2C',
- '\x01', '\x4E', '\x4D', '\x63', '\xF9', '\x61', '\x61', '\x41', '\x63', '\x63', '\x63', '\x73', '\xD4', '\xFE',
- '\x66', '\x66', '\x66', '\x66', '\x66', '\x46', '\x40', '\x7A', '\x7A', '\x5A', '\x73', '\x74', '\x72', '\x00',
- '\xA7', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x01', '\x00', '\x00', '\x00', '\x06', '\x3E',
- '\x49', '\x12', '\x04', '\x00', '\x2A', '\x00', '\x09', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
- '\x09', '\x00', '\x00', '\x00', '\x2C', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x32', '\x00',
- '\x70', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x14', '\x00',
- '\x00', '\x00', '\x14', '\x42', '\x84', '\xF1', '\x40', '\x00', '\x02', '\x02', '\x03', '\x05', '\x06', '\x07',
- '\x08', '\x09', '\x62', '\x43', '\x42', '\x43', '\x02', '\x00', '\x00', '\x00', '\x03', '\xC8', '\x62', '\x49',
- '\x42', '\x49', '\x01', '\x00', '\x00', '\x00', '\x00', '\xD8', '\x94', '\x11', '\x62', '\x53', '\x42', '\x53',
- '\x03', '\x00', '\x00', '\x00', '\x2C', '\x01', '\x28', '\x00', '\xF4', '\x01', '\x62', '\x63', '\x42', '\x63',
- '\x01', '\x00', '\x00', '\x00', '\xFD', '\x62', '\x66', '\x42', '\x66', '\x03', '\x00', '\x00', '\x00', '\x00',
- '\x00', '\x60', '\x40', '\xCD', '\xCC', '\xCC', '\x3D', '\x33', '\x33', '\x2F', '\x42', '\x62', '\x69', '\x42',
- '\x69', '\x03', '\x00', '\x00', '\x00', '\xFD', '\xFF', '\xFF', '\xFF', '\xC8', '\x00', '\x00', '\x00', '\x30',
- '\xFE', '\xFE', '\xFF', '\x62', '\x73', '\x42', '\x73', '\x03', '\x00', '\x00', '\x00', '\xFD', '\xFF', '\xC8',
- '\x00', '\xD4', '\xFE', '\x5A', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x02', '\x00', '\x00',
- '\x00', '\x06', '\x3F', '\x49', '\x12', '\x0A', '\x00', '\x2B', '\x00', '\x08', '\x00', '\x00', '\x00', '\x00',
- '\x00', '\x00', '\x00', '\x09', '\x00', '\x00', '\x00', '\x2C', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61',
- '\x64', '\x33', '\x00', '\x14', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x16', '\x00', '\x00',
- '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11',
- '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x14', '\x00', '\x00',
- '\x00', '\x44', '\x14', '\x81', '\x81', '\x00', '\x00', '\x09', '\x0A', '\x0B', '\x0C', '\x0D', '\x0E'};
+ '\x12', '\x48', '\x00', '\x02', '\x02', '\x03', '\x41', '\x53', '\x43', '\x02', '\x43', '\x43', '\x53', '\x2c',
+ '\x01', '\x4e', '\x4d', '\x63', '\xf9', '\x61', '\x61', '\x41', '\x63', '\x63', '\x63', '\x73', '\xd4', '\xfe',
+ '\x66', '\x66', '\x66', '\x66', '\x66', '\x46', '\x40', '\x7a', '\x7a', '\x5a', '\x73', '\x74', '\x72', '\x00',
+ '\xaf', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x01', '\x00', '\x00', '\x00', '\x06', '\x3e',
+ '\x49', '\x12', '\x06', '\x00', '\x2a', '\x00', '\x09', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
+ '\x09', '\x00', '\x00', '\x00', '\x2c', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x32', '\x00',
+ '\x15', '\x00', '\x00', '\x00', '\x70', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00',
+ '\x00', '\x00', '\x14', '\x00', '\x00', '\x00', '\x25', '\x00', '\x00', '\x00', '\x14', '\x42', '\x84', '\xf1',
+ '\x40', '\x00', '\x02', '\x02', '\x03', '\x05', '\x06', '\x07', '\x08', '\x09', '\x62', '\x43', '\x42', '\x43',
+ '\x02', '\x00', '\x00', '\x00', '\x03', '\xc8', '\x62', '\x49', '\x42', '\x49', '\x01', '\x00', '\x00', '\x00',
+ '\x00', '\xd8', '\x94', '\x11', '\x62', '\x53', '\x42', '\x53', '\x03', '\x00', '\x00', '\x00', '\x2c', '\x01',
+ '\x28', '\x00', '\xf4', '\x01', '\x62', '\x63', '\x42', '\x63', '\x01', '\x00', '\x00', '\x00', '\xfd', '\x62',
+ '\x66', '\x42', '\x66', '\x03', '\x00', '\x00', '\x00', '\x00', '\x00', '\x60', '\x40', '\xcd', '\xcc', '\xcc',
+ '\x3d', '\x33', '\x33', '\x2f', '\x42', '\x62', '\x69', '\x42', '\x69', '\x03', '\x00', '\x00', '\x00', '\xfd',
+ '\xff', '\xff', '\xff', '\xc8', '\x00', '\x00', '\x00', '\x30', '\xfe', '\xfe', '\xff', '\x62', '\x73', '\x42',
+ '\x73', '\x03', '\x00', '\x00', '\x00', '\xfd', '\xff', '\xc8', '\x00', '\xd4', '\xfe', '\x5a', '\x00', '\x00',
+ '\x00', '\x00', '\x00', '\x00', '\x00', '\x02', '\x00', '\x00', '\x00', '\x06', '\x3f', '\x49', '\x12', '\x0a',
+ '\x00', '\x2b', '\x00', '\x08', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', '\x00',
+ '\x00', '\x2c', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x33', '\x00', '\x14', '\x00', '\x00',
+ '\x00', '\x10', '\x00', '\x00', '\x00', '\x16', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11',
+ '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00',
+ '\x00', '\x10', '\x00', '\x00', '\x00', '\x14', '\x00', '\x00', '\x00', '\x44', '\x14', '\x81', '\x81', '\x00',
+ '\x00', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e'};
std::string special_output{
'\x42', '\x41', '\x4D', '\x01', '\x1C', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4E',
@@ -568,15 +544,14 @@ TEST_F(bam_format, too_long_cigar_string_read)
seqan3::sam_file_input fin{stream, this->ref_ids, this->ref_sequences, seqan3::format_bam{}};
- EXPECT_RANGE_EQ(std::get<0>((*fin.begin()).alignment()), std::get<0>(this->alignments[0]));
- EXPECT_RANGE_EQ(std::get<1>((*fin.begin()).alignment()), std::get<1>(this->alignments[0]));
+ EXPECT_RANGE_EQ((*fin.begin()).cigar_sequence(), this->cigars[0]);
EXPECT_EQ((*fin.begin()).tags().size(), 0u); // redundant CG tag is removed
}
{ // error: sam_tag_dictionary is not read
std::istringstream stream{sam_file_with_too_long_cigar_string};
- seqan3::sam_file_input fin{stream, seqan3::format_bam{}, seqan3::fields{}};
+ seqan3::sam_file_input fin{stream, seqan3::format_bam{}, seqan3::fields{}};
ASSERT_THROW(fin.begin(), seqan3::format_error);
}
@@ -585,7 +560,7 @@ TEST_F(bam_format, too_long_cigar_string_read)
seqan3::sam_file_input fin{stream,
seqan3::format_bam{},
- seqan3::fields{}};
+ seqan3::fields{}};
ASSERT_THROW(fin.begin(), seqan3::format_error);
}
@@ -611,25 +586,19 @@ TEST_F(bam_format, too_long_cigar_string_read)
TEST_F(bam_format, too_long_cigar_string_write)
{
- // create an alignment resulting more than 65535 cigar elements
- // -------------------------------------------------------------------------
auto read = seqan3::views::repeat_n('T'_dna5, 70'000);
auto ref = seqan3::views::repeat_n('A'_dna5, 2 * read.size() - 1);
- auto gapped_ref = seqan3::gap_decorator{ref};
-
- // a gap_decorator on a repeat_n view also works but is slow when inserting gaps.
- std::vector> gapped_read;
- gapped_read.reserve(2 * read.size());
- // create gap of length one every second character => T-T-T-T-T-T...
- for (auto chr : read)
- gapped_read.push_back(chr), gapped_read.push_back(seqan3::gap{});
- gapped_read.pop_back(); // remove last seqan3::gap
-
- auto alignment = std::tie(gapped_ref, gapped_read);
+ // create a cigar with more than 65535 cigar elements
+ std::vector too_long_cigar{};
+ for (size_t i = 0; i < 69'999; ++i)
+ {
+ too_long_cigar.push_back({1, 'M'_cigar_operation});
+ too_long_cigar.push_back({1, 'D'_cigar_operation});
+ }
+ too_long_cigar.push_back({1, 'M'_cigar_operation});
// Expected output. ATTENTION this could not be validated by samtools as it does not support too long cigar strings
- // -------------------------------------------------------------------------
std::string expected =
std::string /*the beginning*/
{'\x42', '\x41', '\x4D', '\x01', '\x20', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4E',
@@ -667,10 +636,10 @@ TEST_F(bam_format, too_long_cigar_string_write)
seqan3::field::seq,
seqan3::field::ref_id,
seqan3::field::ref_offset,
- seqan3::field::alignment,
+ seqan3::field::cigar,
seqan3::field::mapq>{}};
- fout.emplace_back(&header, std::string{"long_read"}, read, 0, 0, alignment, 255);
+ fout.emplace_back(&header, std::string{"long_read"}, read, 0, 0, too_long_cigar, 255);
}
os.flush();
@@ -696,9 +665,7 @@ TEST_F(bam_format, issue2417)
std::istringstream stream{input};
- seqan3::sam_file_input fin{stream,
- seqan3::format_bam{},
- seqan3::fields{}};
+ seqan3::sam_file_input fin{stream, seqan3::format_bam{}, seqan3::fields{}};
std::vector> const empty_sequence{};
@@ -707,12 +674,11 @@ TEST_F(bam_format, issue2417)
// In 2417, the sequence was not consumed. Thus, wrong bytes were read for the following records.
// With the chosen `input` this also means that there will be more than 1 record in the alignment file.
// Hence, we need the for loop even though there is only 1 record.
- for (auto && [id, alignment] : fin)
+ for (auto && [id, cigar] : fin)
{
++num_records;
EXPECT_RANGE_EQ(id, std::string{"read1"});
- EXPECT_RANGE_EQ(std::get<0>(alignment), empty_sequence);
- EXPECT_RANGE_EQ(std::get<1>(alignment), empty_sequence);
+ EXPECT_TRUE(cigar.empty());
}
EXPECT_EQ(num_records, 1u);
diff --git a/test/unit/io/sam_file/format_sam_test.cpp b/test/unit/io/sam_file/format_sam_test.cpp
index 36d43f4e8e..27ad0d596c 100644
--- a/test/unit/io/sam_file/format_sam_test.cpp
+++ b/test/unit/io/sam_file/format_sam_test.cpp
@@ -7,8 +7,6 @@
#include
-#include
-
#include "sam_file_format_test_template.hpp"
template <>
@@ -83,14 +81,6 @@ read3 43 ref 3 63 1S1M1P1M1I1M1I1D1M1S ref 10 300 GGAGTATA !!*+,-./
// formatted output
// -----------------------------------------------------------------------------------------------------------------
- std::string simple_three_reads_output{// compared to simple_three_reads_input this has no hard clipping
- R"(@HD VN:1.6
-@SQ SN:ref LN:34
-read1 41 ref 1 61 1S1M1D1M1I ref 10 300 ACGT !##$ AS:i:2 NM:i:7
-read2 42 ref 2 62 7M1D1M1S ref 10 300 AGGCTGNAG !##$&'()* xy:B:S,3,4,5
-read3 43 ref 3 63 1S1M1P1M1I1M1I1D1M1S ref 10 300 GGAGTATA !!*+,-./
-)"};
-
std::string verbose_output{
R"(@HD VN:1.6 SO:unknown GO:none
@SQ SN:ref LN:34 AN:other_name
@@ -98,7 +88,7 @@ read3 43 ref 3 63 1S1M1P1M1I1M1I1D1M1S ref 10 300 GGAGTATA !!*+,-./
@PG ID:prog1 PN:cool_program CL:./prog1 PP:a DS:b VN:c
@CO This is a comment.
read1 41 ref 1 61 1S1M1D1M1I ref 10 300 ACGT !##$ AS:i:2 CC:i:300 NM:i:-7 aa:A:c cc:i:-300 ff:f:3.1 zz:Z:str
-read2 42 ref 2 62 7M1D1M1S ref 10 300 AGGCTGNAG !##$&'()* bC:B:C,3,200 bI:B:I,294967296 bS:B:S,300,40,500 bc:B:c,-3 bf:B:f,3.5,0.1,43.8 bi:B:i,-3,200,-66000 bs:B:s,-3,200,-300
+read2 42 ref 2 62 1H7M1D1M1S2H ref 10 300 AGGCTGNAG !##$&'()* bC:B:C,3,200 bI:B:I,294967296 bS:B:S,300,40,500 bc:B:c,-3 bf:B:f,3.5,0.1,43.8 bi:B:i,-3,200,-66000 bs:B:s,-3,200,-300
read3 43 ref 3 63 1S1M1P1M1I1M1I1D1M1S ref 10 300 GGAGTATA !!*+,-./
)"};
@@ -365,25 +355,14 @@ TEST_F(sam_format, short_cigar_string_with_softclipping)
{
using seqan3::operator""_dna5;
- // The member function transfer_soft_clipping_to needs to work on 2 element cigar strings
- {
- std::istringstream istream("id 16 ref 0 255 10M5S * 0 0 AGAGGGGGATAACCA *\n");
- seqan3::sam_file_input fin{istream,
- ref_ids,
- ref_sequences,
- seqan3::format_sam{},
- seqan3::fields{}};
- EXPECT_RANGE_EQ(std::get<1>(std::get<0>(*fin.begin())), "AGAGGGGGAT"_dna5);
- }
-
{
std::istringstream istream("id 16 ref 0 255 5S10M * 0 0 AGAGGGGGATAACCA *\n");
seqan3::sam_file_input fin{istream,
ref_ids,
ref_sequences,
seqan3::format_sam{},
- seqan3::fields{}};
- EXPECT_RANGE_EQ(std::get<1>(std::get<0>(*fin.begin())), "GGGATAACCA"_dna5);
+ seqan3::fields{}};
+ EXPECT_EQ((*fin.begin()).sequence_position(), 5);
}
}
diff --git a/test/unit/io/sam_file/sam_file_format_test_template.hpp b/test/unit/io/sam_file/sam_file_format_test_template.hpp
index 80a3e6f3b3..e5c5d91c52 100644
--- a/test/unit/io/sam_file/sam_file_format_test_template.hpp
+++ b/test/unit/io/sam_file/sam_file_format_test_template.hpp
@@ -96,29 +96,6 @@ struct sam_file_data : public ::testing::Test
std::vector ref_offsets{0, 1, 2};
- std::vector>, std::vector>>>
- alignments{
- {ref_seq_gapped1, std::vector>{'C'_dna5, seqan3::gap{}, 'G'_dna5, 'T'_dna5}},
- {ref_seq_gapped2,
- std::vector>{'A'_dna5,
- 'G'_dna5,
- 'G'_dna5,
- 'C'_dna5,
- 'T'_dna5,
- 'G'_dna5,
- 'N'_dna5,
- seqan3::gap{},
- 'A'_dna5}},
- {ref_seq_gapped3,
- std::vector>{'G'_dna5,
- seqan3::gap{},
- 'A'_dna5,
- 'G'_dna5,
- 'T'_dna5,
- 'A'_dna5,
- seqan3::gap{},
- 'T'_dna5}}};
-
std::vector flags{seqan3::sam_flag{41u}, seqan3::sam_flag{42u}, seqan3::sam_flag{43u}};
std::vector mapqs{61u, 62u, 63u};
@@ -233,10 +210,9 @@ TYPED_TEST_P(sam_file_read, read_in_all_data)
EXPECT_EQ(rec.id(), this->ids[i]);
EXPECT_EQ(rec.base_qualities(), this->quals[i]);
EXPECT_EQ(rec.sequence_position(), this->offsets[i]);
+ EXPECT_RANGE_EQ(rec.cigar_sequence(), this->cigars[i]);
EXPECT_EQ(rec.reference_id(), 0);
EXPECT_EQ(*rec.reference_position(), this->ref_offsets[i]);
- EXPECT_RANGE_EQ(std::get<0>(rec.alignment()), std::get<0>(this->alignments[i]));
- EXPECT_RANGE_EQ(std::get<1>(rec.alignment()), std::get<1>(this->alignments[i]));
EXPECT_EQ(rec.flag(), this->flags[i]);
EXPECT_EQ(rec.mapping_quality(), this->mapqs[i]);
EXPECT_EQ(rec.mate_reference_id(), std::get<0>(this->mates[i]));
@@ -256,10 +232,9 @@ TYPED_TEST_P(sam_file_read, read_in_all_but_empty_data)
EXPECT_TRUE((*fin.begin()).id().empty());
EXPECT_TRUE((*fin.begin()).base_qualities().empty());
EXPECT_EQ((*fin.begin()).sequence_position(), 0);
+ EXPECT_TRUE((*fin.begin()).cigar_sequence().empty());
EXPECT_TRUE(!(*fin.begin()).reference_id().has_value());
EXPECT_TRUE(!(*fin.begin()).reference_position().has_value());
- EXPECT_TRUE(std::ranges::empty(std::get<0>((*fin.begin()).alignment())));
- EXPECT_TRUE(std::ranges::empty(std::get<1>((*fin.begin()).alignment())));
EXPECT_EQ((*fin.begin()).flag(), seqan3::sam_flag{0u});
EXPECT_EQ((*fin.begin()).mapping_quality(), 0u);
EXPECT_TRUE(!(*fin.begin()).mate_reference_id().has_value());
@@ -278,62 +253,6 @@ TYPED_TEST_P(sam_file_read, read_in_almost_nothing)
EXPECT_EQ(mapq, this->mapqs[i++]);
}
-TYPED_TEST_P(sam_file_read, read_in_alignment_only_with_ref)
-{
- {
- typename TestFixture::stream_type istream{this->simple_three_reads_input};
- seqan3::sam_file_input fin{istream,
- this->ref_ids,
- this->ref_sequences,
- TypeParam{},
- seqan3::fields{}};
-
- size_t i{0};
- for (auto & [alignment] : fin)
- {
- EXPECT_RANGE_EQ(std::get<0>(alignment), std::get<0>(this->alignments[i]));
- EXPECT_RANGE_EQ(std::get<1>(alignment), std::get<1>(this->alignments[i]));
- ++i;
- }
- }
-
- { // empty cigar
- typename TestFixture::stream_type istream{this->empty_cigar};
- seqan3::sam_file_input fin{istream,
- this->ref_ids,
- this->ref_sequences,
- TypeParam{},
- seqan3::fields{}};
-
- EXPECT_TRUE(std::ranges::empty(std::get<0>((*fin.begin()).alignment())));
- EXPECT_TRUE(std::ranges::empty(std::get<1>((*fin.begin()).alignment())));
- }
-}
-
-TYPED_TEST_P(sam_file_read, read_in_alignment_only_without_ref)
-{
- {
- typename TestFixture::stream_type istream{this->simple_three_reads_input};
- seqan3::sam_file_input fin{istream, TypeParam{}, seqan3::fields{}};
-
- size_t i{0};
- for (auto & [alignment] : fin)
- {
- EXPECT_RANGE_EQ(std::get<1>(alignment), std::get<1>(this->alignments[i++]));
- auto & ref_aln = std::get<0>(alignment);
- EXPECT_THROW((ref_aln[0]), std::logic_error); // access on a dummy seq is not allowed
- }
- }
-
- { // empty cigar
- typename TestFixture::stream_type istream{this->empty_cigar};
- seqan3::sam_file_input fin{istream, TypeParam{}, seqan3::fields{}};
-
- EXPECT_TRUE(std::ranges::empty(std::get<0>((*fin.begin()).alignment())));
- EXPECT_TRUE(std::ranges::empty(std::get<1>((*fin.begin()).alignment())));
- }
-}
-
TYPED_TEST_P(sam_file_read, read_mate_but_not_ref_id_with_ref)
{
{ /*with reference information*/
@@ -411,17 +330,13 @@ TYPED_TEST_P(sam_file_read, issue2423)
// sam_file_write
// ----------------------------------------------------------------------------
-// Note that these differ from the sam_file_output default fields:
-// 1. They don't contain field::bit_score and field::evalue since these belong to the BLAST format.
-// 2. field::alignment and field::cigar are redundant. Since field::alignment is the more complex one it is chosen here.
-// The behaviour if both are given is tested in a separate test.
using sam_fields = seqan3::fieldsostream, TypeParam{}, sam_fields{}};
- using default_align_t = std::pair>, std::span>>;
using default_mate_t = std::tuple, int32_t>;
fout.emplace_back(&(this->header),
@@ -466,7 +380,7 @@ TYPED_TEST_P(sam_file_write, write_empty_members)
std::string_view{},
-1,
0,
- default_align_t{},
+ std::vector{},
0,
default_mate_t{},
std::string_view{},
@@ -491,7 +405,7 @@ TYPED_TEST_P(sam_file_write, default_options_all_members_specified)
0 /*ref_id*/,
this->ref_offsets[i],
this->mapqs[i],
- this->alignments[i],
+ this->cigars[i],
this->offsets[i],
this->mates[i],
this->seqs[i],
@@ -501,7 +415,7 @@ TYPED_TEST_P(sam_file_write, default_options_all_members_specified)
}
this->ostream.flush();
- EXPECT_EQ(this->ostream.str(), this->simple_three_reads_output);
+ EXPECT_EQ(this->ostream.str(), this->simple_three_reads_input);
}
TYPED_TEST_P(sam_file_write, write_ref_id_with_different_types)
@@ -517,7 +431,7 @@ TYPED_TEST_P(sam_file_write, write_ref_id_with_different_types)
/*----------------------->*/ this->ref_id,
this->ref_offsets[0],
this->mapqs[0],
- this->alignments[0],
+ this->cigars[0],
this->offsets[0],
this->mates[0],
this->seqs[0],
@@ -531,7 +445,7 @@ TYPED_TEST_P(sam_file_write, write_ref_id_with_different_types)
/*----------------------->*/ std::string_view{this->ref_id},
this->ref_offsets[1],
this->mapqs[1],
- this->alignments[1],
+ this->cigars[1],
this->offsets[1],
this->mates[1],
this->seqs[1],
@@ -545,7 +459,7 @@ TYPED_TEST_P(sam_file_write, write_ref_id_with_different_types)
/*----------------------->*/ this->ref_id | std::views::take(20),
this->ref_offsets[2],
this->mapqs[2],
- this->alignments[2],
+ this->cigars[2],
this->offsets[2],
this->mates[2],
this->seqs[2],
@@ -555,7 +469,7 @@ TYPED_TEST_P(sam_file_write, write_ref_id_with_different_types)
this->ostream.flush();
- EXPECT_EQ(this->ostream.str(), this->simple_three_reads_output);
+ EXPECT_EQ(this->ostream.str(), this->simple_three_reads_input);
}
TYPED_TEST_P(sam_file_write, with_header)
@@ -580,7 +494,7 @@ TYPED_TEST_P(sam_file_write, with_header)
0 /*ref_id*/,
this->ref_offsets[i],
this->mapqs[i],
- this->alignments[i],
+ this->cigars[i],
this->offsets[i],
this->mates[i],
this->seqs[i],
@@ -597,7 +511,7 @@ TYPED_TEST_P(sam_file_write, with_header)
TYPED_TEST_P(sam_file_write, cigar_vector)
{
{
- seqan3::sam_file_output fout{this->ostream, TypeParam{}}; // default fields contain CIGAR and alignment
+ seqan3::sam_file_output fout{this->ostream, TypeParam{}};
for (size_t i = 0ul; i < 3ul; ++i)
{
ASSERT_NO_THROW(fout.emplace_back(this->seqs[i],
@@ -605,7 +519,6 @@ TYPED_TEST_P(sam_file_write, cigar_vector)
this->offsets[i],
0 /*ref_id*/,
this->ref_offsets[i],
- this->alignments[i],
this->cigars[i],
this->mapqs[i],
this->quals[i],
@@ -633,7 +546,6 @@ TYPED_TEST_P(sam_file_write, cigar_vector)
seqan3::field::ref_offset,
seqan3::field::mapq,
seqan3::field::cigar,
- // cigar instead of alignment
seqan3::field::offset,
seqan3::field::mate,
seqan3::field::seq,
@@ -679,7 +591,7 @@ TYPED_TEST_P(sam_file_write, special_cases)
rid,
this->ref_offsets[0],
this->mapqs[0],
- this->alignments[0],
+ this->cigars[0],
this->offsets[0],
mate,
this->seqs[0],
@@ -705,7 +617,7 @@ TYPED_TEST_P(sam_file_write, special_cases)
std::string(""),
this->ref_offsets[0],
this->mapqs[0],
- this->alignments[0],
+ this->cigars[0],
this->offsets[0],
mate_str,
this->seqs[0],
@@ -729,7 +641,7 @@ TYPED_TEST_P(sam_file_write, format_errors)
std::string("ref_id_that_does_not_exist"),
this->ref_offsets[0],
this->mapqs[0],
- this->alignments[0],
+ this->cigars[0],
this->offsets[0],
this->mates[0],
this->seqs[0],
@@ -744,7 +656,7 @@ TYPED_TEST_P(sam_file_write, format_errors)
this->ref_id,
-3,
this->mapqs[0],
- this->alignments[0],
+ this->cigars[0],
this->offsets[0],
this->mates[0],
this->seqs[0],
@@ -759,8 +671,6 @@ REGISTER_TYPED_TEST_SUITE_P(sam_file_read,
read_in_all_data,
read_in_all_but_empty_data,
read_in_almost_nothing,
- read_in_alignment_only_with_ref,
- read_in_alignment_only_without_ref,
read_mate_but_not_ref_id_with_ref,
read_mate_but_not_ref_id_without_ref,
cigar_vector,
diff --git a/test/unit/io/sam_file/sam_file_input_test.cpp b/test/unit/io/sam_file/sam_file_input_test.cpp
index bf369b71dc..d1146e873d 100644
--- a/test/unit/io/sam_file/sam_file_input_test.cpp
+++ b/test/unit/io/sam_file/sam_file_input_test.cpp
@@ -144,7 +144,6 @@ TEST_F(sam_file_input_f, default_template_args_and_deduction_guides)
seqan3::field::offset,
seqan3::field::ref_id,
seqan3::field::ref_offset,
- seqan3::field::alignment,
seqan3::field::cigar,
seqan3::field::mapq,
seqan3::field::qual,
@@ -255,7 +254,7 @@ TEST_F(sam_file_input_f, record_reading)
EXPECT_RANGE_EQ(rec.id(), id_comp[counter]);
EXPECT_RANGE_EQ(rec.base_qualities(), qual_comp[counter]);
- counter++;
+ ++counter;
}
EXPECT_EQ(counter, 3u);
@@ -274,7 +273,7 @@ TEST_F(sam_file_input_f, record_reading_custom_fields)
EXPECT_RANGE_EQ(seq, seq_comp[counter]);
EXPECT_RANGE_EQ(id, id_comp[counter]);
- counter++;
+ ++counter;
}
EXPECT_EQ(counter, 3u);
@@ -297,7 +296,7 @@ TEST_F(sam_file_input_f, file_view)
EXPECT_RANGE_EQ(rec.id(), id_comp[counter]);
EXPECT_RANGE_EQ(rec.base_qualities(), qual_comp[counter]);
- counter++;
+ ++counter;
}
EXPECT_EQ(counter, 3u);
@@ -317,7 +316,7 @@ void decompression_impl(fixture_t & fix, input_file_t & fin)
EXPECT_RANGE_EQ(rec.id(), fix.id_comp[counter]);
EXPECT_RANGE_EQ(rec.base_qualities(), fix.qual_comp[counter]);
- counter++;
+ ++counter;
}
EXPECT_EQ(counter, 3u);
@@ -491,123 +490,14 @@ TEST_F(sam_file_input_f, read_empty_bz2_file)
#endif
// ----------------------------------------------------------------------------
-// SAM format specificities
+// BAM format specificities
// ----------------------------------------------------------------------------
-struct sam_file_input_sam_format_f : public sam_file_input_f
+struct sam_file_input_bam_format_f : public sam_file_input_f
{
std::vector const ref_seqs = {"ACTGATCGAGAGGATCTAGAGGAGATCGTAGGAC"_dna4};
std::vector const ref_ids = {"ref"};
- std::vector> ref_seq_gapped1 = {'A'_dna4, 'C'_dna4, 'T'_dna4, 'G'_dna4};
- std::vector