From 11564cb3bcea39666d6d3979080bc5d8fdbe1d7e Mon Sep 17 00:00:00 2001 From: rrahn Date: Tue, 16 Jul 2024 17:32:10 +0200 Subject: [PATCH 1/3] Fixes #3266: Incorrect begin/end of alignment for banded alignment In case the sequences are sliced by cutting off a prefix of the original seuence due to the band configuration, the relative positions of the sliced sequences was reported inside of the alignment result. However, this slicing happens only internally and thus the positions reported should correspond to the ones of the original sequences. We fix this by tracking the offset of the prefix that was cut off and add this to the relative positions during result creation to get the absolute positions. --- .../pairwise/alignment_algorithm.hpp | 17 ++++++--- .../policy/alignment_matrix_policy.hpp | 37 ++++++++++++++++--- .../pairwise/global_affine_banded_test.cpp | 36 ++++++++++++++++++ 3 files changed, 80 insertions(+), 10 deletions(-) diff --git a/include/seqan3/alignment/pairwise/alignment_algorithm.hpp b/include/seqan3/alignment/pairwise/alignment_algorithm.hpp index c5c5718ddf..c434e1c07e 100644 --- a/include/seqan3/alignment/pairwise/alignment_algorithm.hpp +++ b/include/seqan3/alignment/pairwise/alignment_algorithm.hpp @@ -618,8 +618,11 @@ class alignment_algorithm : res.end_positions = alignment_coordinate_t{column_index_type{this->alignment_state.optimum.column_index}, row_index_type{this->alignment_state.optimum.row_index}}; // At some point this needs to be refactored so that it is not necessary to adapt the coordinate. - if constexpr (traits_t::is_banded) + if constexpr (traits_t::is_banded) { res.end_positions.second += res.end_positions.first - this->trace_matrix.band_col_index; + res.end_positions.first = this->to_original_sequence1_position(res.end_positions.first); + res.end_positions.second = this->to_original_sequence2_position(res.end_positions.second); + } } if constexpr (traits_t::compute_begin_positions) @@ -631,8 +634,10 @@ class alignment_algorithm : detail::row_index_type{this->alignment_state.optimum.row_index}, detail::column_index_type{this->alignment_state.optimum.column_index}}; auto trace_res = builder(this->trace_matrix.trace_path(optimum_coordinate)); - res.begin_positions.first = trace_res.first_sequence_slice_positions.first; - res.begin_positions.second = trace_res.second_sequence_slice_positions.first; + res.begin_positions.first = + this->to_original_sequence1_position(trace_res.first_sequence_slice_positions.first); + res.begin_positions.second = + this->to_original_sequence2_position(trace_res.second_sequence_slice_positions.first); if constexpr (traits_t::compute_sequence_alignment) res.alignment = std::move(trace_res.alignment); @@ -697,8 +702,10 @@ class alignment_algorithm : if constexpr (traits_t::compute_end_positions) { - res.end_positions.first = this->alignment_state.optimum.column_index[simd_index]; - res.end_positions.second = this->alignment_state.optimum.row_index[simd_index]; + res.end_positions.first = + this->to_original_sequence1_position(this->alignment_state.optimum.column_index[simd_index]); + res.end_positions.second = + this->to_original_sequence2_position(this->alignment_state.optimum.row_index[simd_index]); } callback(std::move(res)); diff --git a/include/seqan3/alignment/pairwise/policy/alignment_matrix_policy.hpp b/include/seqan3/alignment/pairwise/policy/alignment_matrix_policy.hpp index 29c85083b5..12ac2a2d3f 100644 --- a/include/seqan3/alignment/pairwise/policy/alignment_matrix_policy.hpp +++ b/include/seqan3/alignment/pairwise/policy/alignment_matrix_policy.hpp @@ -142,23 +142,23 @@ class alignment_matrix_policy template constexpr auto slice_sequences(sequence1_t & sequence1, sequence2_t & sequence2, - align_cfg::band_fixed_size const & band) const noexcept + align_cfg::band_fixed_size const & band) noexcept { size_t seq1_size = std::ranges::distance(sequence1); size_t seq2_size = std::ranges::distance(sequence2); auto trim_sequence1 = [&]() constexpr { - size_t begin_pos = std::max(band.lower_diagonal - 1, 0); + seq1_slice_offset = std::max(band.lower_diagonal - 1, 0); size_t end_pos = std::min(band.upper_diagonal + seq2_size, seq1_size); - return sequence1 | views::slice(begin_pos, end_pos); + return sequence1 | views::slice(seq1_slice_offset, end_pos); }; auto trim_sequence2 = [&]() constexpr { - size_t begin_pos = std::abs(std::min(band.upper_diagonal + 1, 0)); + seq2_slice_offset = std::abs(std::min(band.upper_diagonal + 1, 0)); size_t end_pos = std::min(seq1_size - band.lower_diagonal, seq2_size); - return sequence2 | views::slice(begin_pos, end_pos); + return sequence2 | views::slice(seq2_slice_offset, end_pos); }; return std::tuple{trim_sequence1(), trim_sequence2()}; @@ -192,10 +192,37 @@ class alignment_matrix_policy ++trace_matrix_iter; } + /*! + * \brief Converts a sliced position in the alignment matrix to the corresponding position in the original sequence 1. + * + * \param position The position in the sliced alignment matrix. + * \return The corresponding position in the original sequence 1. + * + * Only changes the position if the sequence was sliced due to band configuration. + */ + constexpr size_t to_original_sequence1_position(size_t position) const noexcept { + return position + seq1_slice_offset; + } + + /*! + * \brief Converts a sliced position in the alignment matrix to the corresponding position in the original sequence 2. + * + * \param position The position in the sliced alignment matrix. + * \return The corresponding position in the original sequence 2. + * + * Only changes the position if the sequence was sliced due to band configuration. + */ + constexpr size_t to_original_sequence2_position(size_t position) const noexcept { + return position + seq2_slice_offset; + } + score_matrix_t score_matrix{}; //!< The scoring matrix. trace_matrix_t trace_matrix{}; //!< The trace matrix if needed. typename score_matrix_t::iterator score_matrix_iter{}; //!< The matrix iterator over the score matrix. typename trace_matrix_t::iterator trace_matrix_iter{}; //!< The matrix iterator over the trace matrix. + + size_t seq1_slice_offset{}; //!< The offset of the first sequence slice. + size_t seq2_slice_offset{}; //!< The offset of the second sequence slice; }; } // namespace seqan3::detail diff --git a/test/unit/alignment/pairwise/global_affine_banded_test.cpp b/test/unit/alignment/pairwise/global_affine_banded_test.cpp index 0b396f1a26..669759a070 100644 --- a/test/unit/alignment/pairwise/global_affine_banded_test.cpp +++ b/test/unit/alignment/pairwise/global_affine_banded_test.cpp @@ -6,6 +6,8 @@ #include +#include + #include "fixture/global_affine_banded.hpp" #include "pairwise_alignment_single_test_template.hpp" @@ -68,3 +70,37 @@ TEST_F(pairwise_global_affine_banded, invalid_band_last_cell_not_covered) fixture.config | seqan3::align_cfg::output_score{}); EXPECT_THROW(result_range.begin(), seqan3::invalid_alignment_configuration); } + +TEST(banded_alignment_issue3266_test, wrong_begin_and_end_position) +{ + using namespace seqan3; + using namespace seqan3::literals; + + const auto configGeneral = + align_cfg::scoring_scheme{nucleotide_scoring_scheme{match_score{1}, mismatch_score{-1}}} | + align_cfg::gap_cost_affine{align_cfg::open_score{0}, align_cfg::extension_score{-1}} | + align_cfg::method_global{ + align_cfg::free_end_gaps_sequence1_leading{true}, + align_cfg::free_end_gaps_sequence2_leading{true}, + align_cfg::free_end_gaps_sequence1_trailing{true}, + align_cfg::free_end_gaps_sequence2_trailing{true}}; + + const auto configBanded = configGeneral | align_cfg::band_fixed_size{align_cfg::lower_diagonal{-40}, + align_cfg::upper_diagonal{-20}}; + + //0 1 2 3 4 + //01234567890123456789012345678901234567890 + std::pair p{"CGTCTA"_dna4, "AAACCCGGGTTTAAACCCGGGTTTCGTGTACCCCCCCCCCC"_dna4}; + // CGTCTA + + auto general_results = align_pairwise(p, configGeneral); + auto general_res = *std::ranges::begin(general_results); + auto banded_results = align_pairwise(p, configBanded); + auto banded_res = *std::ranges::begin(banded_results); + + EXPECT_EQ(general_res.score(), banded_res.score()); + EXPECT_EQ(general_res.sequence1_begin_position(), banded_res.sequence1_begin_position()); + EXPECT_EQ(general_res.sequence2_begin_position(), banded_res.sequence2_begin_position()); + EXPECT_EQ(general_res.sequence1_end_position(), banded_res.sequence1_end_position()); + EXPECT_EQ(general_res.sequence2_end_position(), banded_res.sequence2_end_position()); +} From 0f0ec9408229e64beb818d1258721fa304f246ea Mon Sep 17 00:00:00 2001 From: "seqan-actions[bot]" Date: Tue, 16 Jul 2024 17:33:40 +0200 Subject: [PATCH 2/3] [MISC] automatic linting --- .../pairwise/alignment_algorithm.hpp | 3 ++- .../policy/alignment_matrix_policy.hpp | 11 ++++---- .../pairwise/global_affine_banded_test.cpp | 25 ++++++++----------- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/include/seqan3/alignment/pairwise/alignment_algorithm.hpp b/include/seqan3/alignment/pairwise/alignment_algorithm.hpp index c434e1c07e..898be56550 100644 --- a/include/seqan3/alignment/pairwise/alignment_algorithm.hpp +++ b/include/seqan3/alignment/pairwise/alignment_algorithm.hpp @@ -618,7 +618,8 @@ class alignment_algorithm : res.end_positions = alignment_coordinate_t{column_index_type{this->alignment_state.optimum.column_index}, row_index_type{this->alignment_state.optimum.row_index}}; // At some point this needs to be refactored so that it is not necessary to adapt the coordinate. - if constexpr (traits_t::is_banded) { + if constexpr (traits_t::is_banded) + { res.end_positions.second += res.end_positions.first - this->trace_matrix.band_col_index; res.end_positions.first = this->to_original_sequence1_position(res.end_positions.first); res.end_positions.second = this->to_original_sequence2_position(res.end_positions.second); diff --git a/include/seqan3/alignment/pairwise/policy/alignment_matrix_policy.hpp b/include/seqan3/alignment/pairwise/policy/alignment_matrix_policy.hpp index 12ac2a2d3f..1fb6ac1923 100644 --- a/include/seqan3/alignment/pairwise/policy/alignment_matrix_policy.hpp +++ b/include/seqan3/alignment/pairwise/policy/alignment_matrix_policy.hpp @@ -140,9 +140,8 @@ class alignment_matrix_policy * band starts in the origin and ends in the sink. */ template - constexpr auto slice_sequences(sequence1_t & sequence1, - sequence2_t & sequence2, - align_cfg::band_fixed_size const & band) noexcept + constexpr auto + slice_sequences(sequence1_t & sequence1, sequence2_t & sequence2, align_cfg::band_fixed_size const & band) noexcept { size_t seq1_size = std::ranges::distance(sequence1); size_t seq2_size = std::ranges::distance(sequence2); @@ -200,7 +199,8 @@ class alignment_matrix_policy * * Only changes the position if the sequence was sliced due to band configuration. */ - constexpr size_t to_original_sequence1_position(size_t position) const noexcept { + constexpr size_t to_original_sequence1_position(size_t position) const noexcept + { return position + seq1_slice_offset; } @@ -212,7 +212,8 @@ class alignment_matrix_policy * * Only changes the position if the sequence was sliced due to band configuration. */ - constexpr size_t to_original_sequence2_position(size_t position) const noexcept { + constexpr size_t to_original_sequence2_position(size_t position) const noexcept + { return position + seq2_slice_offset; } diff --git a/test/unit/alignment/pairwise/global_affine_banded_test.cpp b/test/unit/alignment/pairwise/global_affine_banded_test.cpp index 669759a070..d3902fb6f2 100644 --- a/test/unit/alignment/pairwise/global_affine_banded_test.cpp +++ b/test/unit/alignment/pairwise/global_affine_banded_test.cpp @@ -5,7 +5,6 @@ #include #include - #include #include "fixture/global_affine_banded.hpp" @@ -76,22 +75,20 @@ TEST(banded_alignment_issue3266_test, wrong_begin_and_end_position) using namespace seqan3; using namespace seqan3::literals; - const auto configGeneral = - align_cfg::scoring_scheme{nucleotide_scoring_scheme{match_score{1}, mismatch_score{-1}}} | - align_cfg::gap_cost_affine{align_cfg::open_score{0}, align_cfg::extension_score{-1}} | - align_cfg::method_global{ - align_cfg::free_end_gaps_sequence1_leading{true}, - align_cfg::free_end_gaps_sequence2_leading{true}, - align_cfg::free_end_gaps_sequence1_trailing{true}, - align_cfg::free_end_gaps_sequence2_trailing{true}}; + auto const configGeneral = align_cfg::scoring_scheme{nucleotide_scoring_scheme{match_score{1}, mismatch_score{-1}}} + | align_cfg::gap_cost_affine{align_cfg::open_score{0}, align_cfg::extension_score{-1}} + | align_cfg::method_global{align_cfg::free_end_gaps_sequence1_leading{true}, + align_cfg::free_end_gaps_sequence2_leading{true}, + align_cfg::free_end_gaps_sequence1_trailing{true}, + align_cfg::free_end_gaps_sequence2_trailing{true}}; - const auto configBanded = configGeneral | align_cfg::band_fixed_size{align_cfg::lower_diagonal{-40}, - align_cfg::upper_diagonal{-20}}; + auto const configBanded = + configGeneral | align_cfg::band_fixed_size{align_cfg::lower_diagonal{-40}, align_cfg::upper_diagonal{-20}}; - //0 1 2 3 4 - //01234567890123456789012345678901234567890 + //0 1 2 3 4 + //01234567890123456789012345678901234567890 std::pair p{"CGTCTA"_dna4, "AAACCCGGGTTTAAACCCGGGTTTCGTGTACCCCCCCCCCC"_dna4}; - // CGTCTA + // CGTCTA auto general_results = align_pairwise(p, configGeneral); auto general_res = *std::ranges::begin(general_results); From bfb1e32766736a7bde2791d4e14163bc2a5a1477 Mon Sep 17 00:00:00 2001 From: Enrico Seiler Date: Tue, 16 Jul 2024 18:02:22 +0200 Subject: [PATCH 3/3] Apply suggestions from code review --- .../alignment/pairwise/policy/alignment_matrix_policy.hpp | 6 ++---- test/unit/alignment/pairwise/global_affine_banded_test.cpp | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/include/seqan3/alignment/pairwise/policy/alignment_matrix_policy.hpp b/include/seqan3/alignment/pairwise/policy/alignment_matrix_policy.hpp index 1fb6ac1923..d7d50924de 100644 --- a/include/seqan3/alignment/pairwise/policy/alignment_matrix_policy.hpp +++ b/include/seqan3/alignment/pairwise/policy/alignment_matrix_policy.hpp @@ -191,8 +191,7 @@ class alignment_matrix_policy ++trace_matrix_iter; } - /*! - * \brief Converts a sliced position in the alignment matrix to the corresponding position in the original sequence 1. + /*!\brief Converts a sliced position in the alignment matrix to the corresponding position in the original sequence 1. * * \param position The position in the sliced alignment matrix. * \return The corresponding position in the original sequence 1. @@ -204,8 +203,7 @@ class alignment_matrix_policy return position + seq1_slice_offset; } - /*! - * \brief Converts a sliced position in the alignment matrix to the corresponding position in the original sequence 2. + /*!\brief Converts a sliced position in the alignment matrix to the corresponding position in the original sequence 2. * * \param position The position in the sliced alignment matrix. * \return The corresponding position in the original sequence 2. diff --git a/test/unit/alignment/pairwise/global_affine_banded_test.cpp b/test/unit/alignment/pairwise/global_affine_banded_test.cpp index d3902fb6f2..fd5494bc2e 100644 --- a/test/unit/alignment/pairwise/global_affine_banded_test.cpp +++ b/test/unit/alignment/pairwise/global_affine_banded_test.cpp @@ -5,7 +5,6 @@ #include #include -#include #include "fixture/global_affine_banded.hpp" #include "pairwise_alignment_single_test_template.hpp"