From fcc1ce314653def139e124a7b9c89849e3048861 Mon Sep 17 00:00:00 2001 From: Enrico Seiler Date: Tue, 13 Aug 2024 14:16:14 +0200 Subject: [PATCH] [FIX] iterator may be invalidated `window_hashes.emplace_back` may invalidate the `min`, an iterator pointing to some location in `window_hashes`. Also fixes an error in the comparison: `new_hash < min->second` compared the new hash value against the current minimiser position instead of the current minimiser hash. --- .../threshold/forward_strand_minimiser.hpp | 41 +++++++++++-------- test/unit/api/threshold.cpp | 8 ++-- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/include/raptor/threshold/forward_strand_minimiser.hpp b/include/raptor/threshold/forward_strand_minimiser.hpp index ea6af8e3..67f2dbaf 100644 --- a/include/raptor/threshold/forward_strand_minimiser.hpp +++ b/include/raptor/threshold/forward_strand_minimiser.hpp @@ -94,38 +94,45 @@ struct forward_strand_minimiser auto kmer_view = text | seqan3::views::kmer_hash(shape) | std::views::transform(apply_xor); forward_hashes.assign(kmer_view.begin(), kmer_view.end()); + struct kmer + { + uint64_t hash{}; + uint64_t position{}; + + constexpr auto operator<=>(kmer const & other) const = default; + }; + // Stores hash and begin for all k-mers in the window - std::deque> window_hashes; + std::deque window_hashes; // Initialisation. We need to compute all hashes for the first window. for (uint64_t i = 0; i < kmers_per_window; ++i) - window_hashes.emplace_back(forward_hashes[i], i); + window_hashes.emplace_back(kmer{.hash = forward_hashes[i], .position = i}); // The minimum hash is the minimiser. Store the begin position. - auto min = std::min_element(std::begin(window_hashes), std::end(window_hashes)); - minimiser_begin.push_back(min->second); + auto min = std::ranges::min(window_hashes); + minimiser_begin.push_back(min.position); // For the following windows, we remove the first window k-mer (is now not in window) and add the new k-mer // that results from the window shifting. for (uint64_t i = kmers_per_window; i < max_number_of_minimiser; ++i) { - // Already store the new hash without removing the first one. uint64_t const new_hash{forward_hashes[i + kmers_per_window - 1]}; // Already did kmers_per_window - 1 many - window_hashes.emplace_back(new_hash, i); - if (new_hash < min->second) // New hash is the minimum. - { - min = std::prev(std::end(window_hashes)); - minimiser_begin.push_back(min->second); - } - else if (min == std::begin(window_hashes)) // Minimum is the yet-to-be-removed begin of the window. + // There are two conditions when we need to recompute the minimum: + bool const minimiser_leaves_window = window_hashes.front() == min; + bool const new_hash_is_min = new_hash < min.hash; + + // Rolling hash / sliding window + window_hashes.pop_front(); + window_hashes.emplace_back(kmer{.hash = new_hash, .position = i}); + + // Update the minimum + if (new_hash_is_min || minimiser_leaves_window) { - // The first hash will be removed, the last one is caught by the previous if. - min = std::min_element(++std::begin(window_hashes), std::prev(std::end(window_hashes))); - minimiser_begin.push_back(min->second); + min = new_hash_is_min ? window_hashes.back() : std::ranges::min(window_hashes); + minimiser_begin.push_back(min.position); } - - window_hashes.pop_front(); // Remove the first k-mer. } return; } diff --git a/test/unit/api/threshold.cpp b/test/unit/api/threshold.cpp index 7f4aef0f..67a0ba36 100644 --- a/test/unit/api/threshold.cpp +++ b/test/unit/api/threshold.cpp @@ -116,11 +116,11 @@ TEST(minimiser, logspace_substract) raptor::threshold::threshold const threshold{parameters}; // results for pseudorandom generators are implementation-defined #ifdef _LIBCPP_VERSION - std::vector const expected{1u, 1u, 1u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 8u, 9u, - 10u, 11u, 12u, 13u, 14u, 15u, 16u, 17u, 18u, 19u, 21u, 22u}; + std::vector const expected{1u, 1u, 1u, 1u, 2u, 4u, 5u, 6u, 7u, 8u, 9u, 9u, 10u, + 11u, 12u, 13u, 14u, 15u, 16u, 17u, 18u, 19u, 20u, 22u, 23u}; #else - std::vector const expected{1u, 1u, 1u, 1u, 2u, 3u, 4u, 5u, 6u, 6u, 7u, 8u, 9u, - 10u, 11u, 12u, 13u, 13u, 14u, 15u, 16u, 17u, 18u, 20u, 21u}; + std::vector const expected{1u, 1u, 1u, 1u, 2u, 4u, 5u, 6u, 7u, 7u, 8u, 9u, 10u, + 11u, 12u, 13u, 14u, 15u, 16u, 17u, 18u, 19u, 20u, 22u, 23u}; #endif ASSERT_EQ(expected.size(), 37u - 12u); for (size_t i = 12u; i < 37u; ++i)