Skip to content

Commit

Permalink
[FIX] PIBF not working
Browse files Browse the repository at this point in the history
  • Loading branch information
eseiler committed Aug 11, 2021
1 parent 02312c4 commit 161454a
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 4 deletions.
3 changes: 2 additions & 1 deletion include/raptor/search/run_program_multiple.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ void run_program_multiple(search_arguments const & arguments)
auto end = std::chrono::high_resolution_clock::now();
reads_io_time += std::chrono::duration_cast<std::chrono::duration<double>>(end - start).count();

cereal_handle.wait();

std::vector<seqan3::counting_vector<uint16_t>> counts(records.size(),
seqan3::counting_vector<uint16_t>(ibf.bin_count(), 0));

Expand All @@ -94,7 +96,6 @@ void run_program_multiple(search_arguments const & arguments)
}
};

cereal_handle.wait();
do_parallel(count_task, records.size(), arguments.threads, compute_time);

for (size_t const part : std::views::iota(1u, static_cast<unsigned int>(arguments.parts - 1)))
Expand Down
20 changes: 17 additions & 3 deletions src/argument_parsing/search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ void init_search_parser(seqan3::argument_parser & parser, search_arguments & arg
"index",
arguments.is_socks ? "Provide a valid path to an IBF." :
"Provide a valid path to an IBF. Parts: Without suffix _0",
seqan3::option_spec::required,
seqan3::input_file_validator{});
seqan3::option_spec::required);
parser.add_option(arguments.query_file,
'\0',
"query",
Expand Down Expand Up @@ -81,6 +80,19 @@ void run_search(seqan3::argument_parser & parser, bool const is_socks)

arguments.treshold_was_set = parser.is_option_set("threshold");

if (arguments.parts == 1)
{
seqan3::input_file_validator{}(arguments.ibf_file);
}
else
{
seqan3::input_file_validator validator{};
for (size_t part{0}; part < arguments.parts; ++part)
{
validator(arguments.ibf_file.string() + std::string{"_"} + std::to_string(part));
}
}

// ==========================================
// Process --pattern.
// ==========================================
Expand All @@ -100,7 +112,9 @@ void run_search(seqan3::argument_parser & parser, bool const is_socks)
// Read window and kmer size, and the bin paths.
// ==========================================
{
std::ifstream is{arguments.ibf_file, std::ios::binary};
std::ifstream is{arguments.parts == 1 ? arguments.ibf_file.string() :
arguments.ibf_file.string() + std::string{"_0"},
std::ios::binary};
cereal::BinaryInputArchive iarchive{is};
iarchive(arguments.kmer_size);
iarchive(arguments.window_size);
Expand Down
1 change: 1 addition & 0 deletions test/cli/cli_test.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,4 @@ struct raptor : public cli_test

struct raptor_build : public raptor, public testing::WithParamInterface<std::tuple<size_t, size_t, bool>> {};
struct raptor_search : public raptor, public testing::WithParamInterface<std::tuple<size_t, size_t, size_t>> {};
struct raptor_parts : public raptor, public testing::WithParamInterface<std::tuple<size_t, size_t, bool, size_t>> {};
89 changes: 89 additions & 0 deletions test/cli/raptor_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,3 +289,92 @@ INSTANTIATE_TEST_SUITE_P(search_suite,
std::to_string(std::get<2>(info.param)) + "_error";
return name;
});

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////// raptor parts tests //////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

TEST_P(raptor_parts, pipeline)
{
auto const [number_of_repeated_bins, window_size, run_parallel_tmp, number_of_errors] = GetParam();
bool const run_parallel = run_parallel_tmp && number_of_repeated_bins >= 32;

if (window_size == 23 && number_of_errors == 0)
GTEST_SKIP() << "Needs dynamic threshold correction";

std::stringstream header{};
{
std::string const expanded_bins = repeat_bins(number_of_repeated_bins);
std::ofstream file{"raptor_cli_test.txt"};
auto split_bins = expanded_bins
| std::views::split(' ')
| std::views::transform([](auto &&rng) {
return std::string_view(&*rng.begin(), std::ranges::distance(rng));});
size_t usr_bin_id{0};
for (auto && file_path : split_bins)
{
header << '#' << usr_bin_id++ << '\t' << file_path << '\n';
file << file_path << '\n';
}
header << "#QUERY_NAME\tUSER_BINS\n";
file << '\n';
}

cli_test_result const result1 = execute_app("raptor", "build",
"--kmer 19",
"--window ", std::to_string(window_size),
"--size 64k",
"--threads ", run_parallel ? "2" : "1",
"--output index.ibf",
"--parts 4",
"raptor_cli_test.txt");
EXPECT_EQ(result1.out, std::string{});
EXPECT_EQ(result1.err, std::string{});
ASSERT_EQ(result1.exit_code, 0);

cli_test_result const result2 = execute_app("raptor", "search",
"--output search.out",
"--error ", std::to_string(number_of_errors),
"--index ", "index.ibf",
"--parts 4",
"--query ", data("query.fq"));
EXPECT_EQ(result2.out, std::string{});
EXPECT_EQ(result2.err, std::string{});
ASSERT_EQ(result2.exit_code, 0);

std::string const expected = [&] ()
{
std::string result{header.str()};
std::string line{};
std::ifstream search_result{search_result_path(number_of_repeated_bins,
window_size,
number_of_errors)};
while (std::getline(search_result, line) && line.substr(0, 6) != "query1")
{}
result += line;
result += '\n';
while (std::getline(search_result, line))
{
result += line;
result += '\n';
}

return result;
}();

std::string const actual = string_from_file("search.out");

EXPECT_EQ(expected, actual);
}

INSTANTIATE_TEST_SUITE_P(parts_suite,
raptor_parts,
testing::Combine(testing::Values(0), testing::Values(19, 23), testing::Values(true, false), testing::Values(0, 1)),
[] (testing::TestParamInfo<raptor_parts::ParamType> const & info)
{
std::string name = std::to_string(std::max<int>(1, std::get<0>(info.param) * 4)) + "_bins_" +
std::to_string(std::get<1>(info.param)) + "_window_" +
(std::get<2>(info.param) ? "parallel" : "serial") +
std::to_string(std::get<3>(info.param)) + "_error";
return name;
});

0 comments on commit 161454a

Please sign in to comment.