Skip to content

Commit

Permalink
[FIX] iterator may be invalidated
Browse files Browse the repository at this point in the history
`window_hashes.emplace_back` may invalidate the `min`, an iterator
pointing to some location in `window_hashes`.

Also fixes an error in the comparison:

`new_hash < min->second` compared the new hash value against the current
minimiser position instead of the current minimiser hash.
  • Loading branch information
eseiler committed Aug 13, 2024
1 parent f3587c7 commit fcc1ce3
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 21 deletions.
41 changes: 24 additions & 17 deletions include/raptor/threshold/forward_strand_minimiser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,38 +94,45 @@ struct forward_strand_minimiser
auto kmer_view = text | seqan3::views::kmer_hash(shape) | std::views::transform(apply_xor);
forward_hashes.assign(kmer_view.begin(), kmer_view.end());

struct kmer
{
uint64_t hash{};
uint64_t position{};

constexpr auto operator<=>(kmer const & other) const = default;
};

// Stores hash and begin for all k-mers in the window
std::deque<std::pair<uint64_t, uint64_t>> window_hashes;
std::deque<kmer> window_hashes;

// Initialisation. We need to compute all hashes for the first window.
for (uint64_t i = 0; i < kmers_per_window; ++i)
window_hashes.emplace_back(forward_hashes[i], i);
window_hashes.emplace_back(kmer{.hash = forward_hashes[i], .position = i});

// The minimum hash is the minimiser. Store the begin position.
auto min = std::min_element(std::begin(window_hashes), std::end(window_hashes));
minimiser_begin.push_back(min->second);
auto min = std::ranges::min(window_hashes);
minimiser_begin.push_back(min.position);

// For the following windows, we remove the first window k-mer (is now not in window) and add the new k-mer
// that results from the window shifting.
for (uint64_t i = kmers_per_window; i < max_number_of_minimiser; ++i)
{
// Already store the new hash without removing the first one.
uint64_t const new_hash{forward_hashes[i + kmers_per_window - 1]}; // Already did kmers_per_window - 1 many
window_hashes.emplace_back(new_hash, i);

if (new_hash < min->second) // New hash is the minimum.
{
min = std::prev(std::end(window_hashes));
minimiser_begin.push_back(min->second);
}
else if (min == std::begin(window_hashes)) // Minimum is the yet-to-be-removed begin of the window.
// There are two conditions when we need to recompute the minimum:
bool const minimiser_leaves_window = window_hashes.front() == min;
bool const new_hash_is_min = new_hash < min.hash;

// Rolling hash / sliding window
window_hashes.pop_front();
window_hashes.emplace_back(kmer{.hash = new_hash, .position = i});

// Update the minimum
if (new_hash_is_min || minimiser_leaves_window)
{
// The first hash will be removed, the last one is caught by the previous if.
min = std::min_element(++std::begin(window_hashes), std::prev(std::end(window_hashes)));
minimiser_begin.push_back(min->second);
min = new_hash_is_min ? window_hashes.back() : std::ranges::min(window_hashes);
minimiser_begin.push_back(min.position);
}

window_hashes.pop_front(); // Remove the first k-mer.
}
return;
}
Expand Down
8 changes: 4 additions & 4 deletions test/unit/api/threshold.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,11 @@ TEST(minimiser, logspace_substract)
raptor::threshold::threshold const threshold{parameters};
// results for pseudorandom generators are implementation-defined
#ifdef _LIBCPP_VERSION
std::vector<size_t> const expected{1u, 1u, 1u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 8u, 9u,
10u, 11u, 12u, 13u, 14u, 15u, 16u, 17u, 18u, 19u, 21u, 22u};
std::vector<size_t> const expected{1u, 1u, 1u, 1u, 2u, 4u, 5u, 6u, 7u, 8u, 9u, 9u, 10u,
11u, 12u, 13u, 14u, 15u, 16u, 17u, 18u, 19u, 20u, 22u, 23u};
#else
std::vector<size_t> const expected{1u, 1u, 1u, 1u, 2u, 3u, 4u, 5u, 6u, 6u, 7u, 8u, 9u,
10u, 11u, 12u, 13u, 13u, 14u, 15u, 16u, 17u, 18u, 20u, 21u};
std::vector<size_t> const expected{1u, 1u, 1u, 1u, 2u, 4u, 5u, 6u, 7u, 7u, 8u, 9u, 10u,
11u, 12u, 13u, 14u, 15u, 16u, 17u, 18u, 19u, 20u, 22u, 23u};
#endif
ASSERT_EQ(expected.size(), 37u - 12u);
for (size_t i = 12u; i < 37u; ++i)
Expand Down

0 comments on commit fcc1ce3

Please sign in to comment.