From 6f30de0645f1afa476f5b3921ea669aa07054559 Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Fri, 10 Mar 2017 18:41:23 +0100 Subject: [PATCH] remove MEMs that contain Ns Presumably these are not informative. --- src/mapper.cpp | 13 ++++++++++--- src/mapper.hpp | 2 ++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/mapper.cpp b/src/mapper.cpp index f60134d67dc..337837560e9 100644 --- a/src/mapper.cpp +++ b/src/mapper.cpp @@ -3532,9 +3532,11 @@ Mapper::find_mems_simple(string::const_iterator seq_begin, mems.erase(std::remove_if(mems.begin(), mems.end(), [&smems_begin, &min_mem_length](const MaximalExactMatch& m) { - return m.end-m.begin == 0 - || m.length() < min_mem_length - || smems_begin[m.begin] != m.end; + return ( m.end-m.begin == 0 + || m.length() < min_mem_length + || smems_begin[m.begin] != m.end + || m.count_Ns() > 0 + ); }), mems.end()); // return the matches in natural order @@ -6721,6 +6723,11 @@ void MaximalExactMatch::fill_positions(Mapper* mapper) { } } +// counts Ns in the MEM +size_t MaximalExactMatch::count_Ns(void) const { + return std::count(begin, end, 'N'); +} + ostream& operator<<(ostream& out, const MaximalExactMatch& mem) { size_t len = mem.begin - mem.end; out << mem.sequence() << ":"; diff --git a/src/mapper.hpp b/src/mapper.hpp index 7606af6171b..65ed854ea99 100644 --- a/src/mapper.hpp +++ b/src/mapper.hpp @@ -54,6 +54,8 @@ class MaximalExactMatch { int length(void) const; // uses an xgindex to fill out the MEM positions void fill_positions(Mapper* mapper); + // tells if the MEM contains an N + size_t count_Ns(void) const; friend bool operator==(const MaximalExactMatch& m1, const MaximalExactMatch& m2); friend bool operator<(const MaximalExactMatch& m1, const MaximalExactMatch& m2);