Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix possible type confusion with boost::locale::collator #216

Merged
merged 10 commits into from
Feb 8, 2024
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ add_library(boost_locale
src/boost/locale/shared/localization_backend.cpp
src/boost/locale/shared/message.cpp
src/boost/locale/shared/mo_lambda.cpp
src/boost/locale/shared/std_collate_adapter.hpp
src/boost/locale/util/codecvt_converter.cpp
src/boost/locale/util/default_locale.cpp
src/boost/locale/util/encoding.cpp
Expand Down
5 changes: 4 additions & 1 deletion doc/changelog.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
//
// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
// Copyright (c) 2021-2023 Alexander Grund
// Copyright (c) 2021-2024 Alexander Grund
//
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt

/*!
\page changelog Changelog

- 1.85.0
- Breaking changes
- `collator` does no longer derive from `std::collator` avoiding possible type confusion
- 1.84.0
- Breaking changes
- `to_title` for the WinAPI backend returns the string unchanged instead of an empty string
Expand Down
61 changes: 29 additions & 32 deletions include/boost/locale/collator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#define BOOST_LOCALE_COLLATOR_HPP_INCLUDED

#include <boost/locale/config.hpp>
#include <boost/locale/detail/facet_id.hpp>
#include <locale>

#ifdef BOOST_MSVC
Expand Down Expand Up @@ -43,17 +44,16 @@ namespace boost { namespace locale {

/// \brief Collation facet.
///
/// It reimplements standard C++ std::collate,
/// allowing usage of std::locale for direct string comparison
/// It reimplements standard C++ std::collate with support for collation levels
template<typename CharType>
class collator : public std::collate<CharType> {
class BOOST_SYMBOL_VISIBLE collator : public std::locale::facet, public detail::facet_id<collator<CharType>> {
public:
/// Type of the underlying character
typedef CharType char_type;
/// Type of string used with this facet
typedef std::basic_string<CharType> string_type;

/// Compare two strings in rage [b1,e1), [b2,e2) according using a collation level \a level. Calls do_compare
/// Compare two strings in range [b1,e1), [b2,e2) according to collation level \a level. Calls do_compare
///
/// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if
/// they considered equal.
Expand All @@ -66,6 +66,13 @@ namespace boost { namespace locale {
return do_compare(level, b1, e1, b2, e2);
}

/// Default compare function as-in std::collate that does not take collation level into account.
/// Uses identical level
int compare(const char_type* b1, const char_type* e1, const char_type* b2, const char_type* e2) const
{
return compare(collate_level::identical, b1, e1, b2, e2);
}

/// Create a binary string that can be compared to other in order to get collation order. The string is created
/// for text in range [b,e). It is useful for collation of multiple strings for text.
///
Expand All @@ -80,13 +87,24 @@ namespace boost { namespace locale {
return do_transform(level, b, e);
}

/// Default transform function as-in std::collate that does not take collation level into account.
/// Uses identical level
string_type transform(const char_type* b, const char_type* e) const
{
return transform(collate_level::identical, b, e);
}

/// Calculate a hash of a text in range [b,e). The value can be used for collation sensitive string comparison.
///
/// If compare(level,b1,e1,b2,e2) == 0 then hash(level,b1,e1) == hash(level,b2,e2)
///
/// Calls do_hash
long hash(collate_level level, const char_type* b, const char_type* e) const { return do_hash(level, b, e); }

/// Default hash function as-in std::collate that does not take collation level into account.
/// Uses identical level
long hash(const char_type* b, const char_type* e) const { return hash(collate_level::identical, b, e); }

/// Compare two strings \a l and \a r using collation level \a level
///
/// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if
Expand All @@ -107,7 +125,7 @@ namespace boost { namespace locale {
/// Create a binary string from string \a s, that can be compared to other, useful for collation of multiple
/// strings.
///
/// The transformation follows these rules:
/// The transformation follows this rule:
/// \code
/// compare(level,s1,s2) == sign( transform(level,s1).compare(transform(level,s2)) );
/// \endcode
Expand All @@ -118,29 +136,7 @@ namespace boost { namespace locale {

protected:
/// constructor of the collator object
collator(size_t refs = 0) : std::collate<CharType>(refs) {}

/// This function is used to override default collation function that does not take in account collation level.
/// Uses primary level
int
do_compare(const char_type* b1, const char_type* e1, const char_type* b2, const char_type* e2) const override
{
return do_compare(collate_level::identical, b1, e1, b2, e2);
}

/// This function is used to override default collation function that does not take in account collation level.
/// Uses primary level
string_type do_transform(const char_type* b, const char_type* e) const override
{
return do_transform(collate_level::identical, b, e);
}

/// This function is used to override default collation function that does not take in account collation level.
/// Uses primary level
long do_hash(const char_type* b, const char_type* e) const override
{
return do_hash(collate_level::identical, b, e);
}
collator(size_t refs = 0) : std::locale::facet(refs) {}

/// Actual function that performs comparison between the strings. For details see compare member function. Can
/// be overridden.
Expand All @@ -157,7 +153,7 @@ namespace boost { namespace locale {
};

/// \brief This class can be used in STL algorithms and containers for comparison of strings
/// with a level other than primary
/// with a level other than identical
///
/// For example:
///
Expand All @@ -169,21 +165,22 @@ namespace boost { namespace locale {
template<typename CharType, collate_level default_level = collate_level::identical>
struct comparator {
public:
/// Create a comparator class for locale \a l and with collation leval \a level
/// Create a comparator class for locale \a l and with collation level \a level
///
/// \throws std::bad_cast: \a l does not have \ref collator facet installed
comparator(const std::locale& l = std::locale(), collate_level level = default_level) :
locale_(l), level_(level)
locale_(l), collator_(std::use_facet<collator<CharType>>(locale_)), level_(level)
{}

/// Compare two strings -- equivalent to return left < right according to collation rules
bool operator()(const std::basic_string<CharType>& left, const std::basic_string<CharType>& right) const
{
return std::use_facet<collator<CharType>>(locale_).compare(level_, left, right) < 0;
return collator_.compare(level_, left, right) < 0;
}

private:
std::locale locale_;
const collator<CharType>& collator_;
collate_level level_;
};

Expand Down
49 changes: 25 additions & 24 deletions src/boost/locale/icu/collator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
#include "boost/locale/icu/icu_util.hpp"
#include "boost/locale/icu/uconv.hpp"
#include "boost/locale/shared/mo_hash.hpp"
#include "boost/locale/shared/std_collate_adapter.hpp"
#include <boost/thread.hpp>
#include <limits>
#include <memory>
#include <unicode/coll.h>
#include <vector>
#if BOOST_LOCALE_ICU_VERSION >= 402
Expand Down Expand Up @@ -51,7 +53,7 @@
{
icu::StringPiece left(b1, e1 - b1);
icu::StringPiece right(b2, e2 - b2);
return get_collator(level)->compareUTF8(left, right, status);
return get_collator(level).compareUTF8(left, right, status);
}
#endif

Expand All @@ -64,7 +66,7 @@
{
icu::UnicodeString left = cvt_.icu(b1, e1);
icu::UnicodeString right = cvt_.icu(b2, e2);
return get_collator(level)->compare(left, right, status);
return get_collator(level).compare(left, right, status);
}

int do_real_compare(collate_level level,
Expand Down Expand Up @@ -101,11 +103,11 @@
icu::UnicodeString str = cvt_.icu(b, e);
std::vector<uint8_t> tmp;
tmp.resize(str.length() + 1u);
icu::Collator* collate = get_collator(level);
const int len = collate->getSortKey(str, tmp.data(), tmp.size());
icu::Collator& collate = get_collator(level);
const int len = collate.getSortKey(str, tmp.data(), tmp.size());
if(len > int(tmp.size())) {
tmp.resize(len);
collate->getSortKey(str, tmp.data(), tmp.size());
collate.getSortKey(str, tmp.data(), tmp.size());
} else
tmp.resize(len);
return tmp;
Expand All @@ -126,7 +128,7 @@

collate_impl(const cdata& d) : cvt_(d.encoding()), locale_(d.locale()), is_utf8_(d.is_utf8()) {}

icu::Collator* get_collator(collate_level level) const
icu::Collator& get_collator(collate_level level) const
{
const int lvl_idx = level_to_int(level);
constexpr icu::Collator::ECollationStrength levels[level_count] = {icu::Collator::PRIMARY,
Expand All @@ -136,18 +138,17 @@
icu::Collator::IDENTICAL};

icu::Collator* col = collates_[lvl_idx].get();
if(col)
return col;

UErrorCode status = U_ZERO_ERROR;

collates_[lvl_idx].reset(icu::Collator::createInstance(locale_, status));

if(U_FAILURE(status))
throw std::runtime_error(std::string("Creation of collate failed:") + u_errorName(status));

collates_[lvl_idx]->setStrength(levels[lvl_idx]);
return collates_[lvl_idx].get();
if(!col) {
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::Collator> tmp_col(icu::Collator::createInstance(locale_, status));
if(U_FAILURE(status))
throw std::runtime_error(std::string("Creation of collate failed:") + u_errorName(status));

Check warning on line 145 in src/boost/locale/icu/collator.cpp

View check run for this annotation

Codecov / codecov/patch

src/boost/locale/icu/collator.cpp#L145

Added line #L145 was not covered by tests

tmp_col->setStrength(levels[lvl_idx]);
col = tmp_col.release();
collates_[lvl_idx].reset(col);
}
return *col;
}

private:
Expand All @@ -173,21 +174,21 @@
return do_ustring_compare(level, b1, e1, b2, e2, status);
}
#endif

std::locale create_collate(const std::locale& in, const cdata& cd, char_facet_t type)
{
switch(type) {
case char_facet_t::nochar: break;
case char_facet_t::char_f: return std::locale(in, new collate_impl<char>(cd));
case char_facet_t::wchar_f: return std::locale(in, new collate_impl<wchar_t>(cd));
case char_facet_t::char_f: return impl::create_collators<char, collate_impl>(in, cd);
case char_facet_t::wchar_f: return impl::create_collators<wchar_t, collate_impl>(in, cd);
#ifdef __cpp_char8_t
case char_facet_t::char8_f: break; // std-facet not available (yet)
case char_facet_t::char8_f:
return std::locale(in, new collate_impl<char8_t>(cd)); // std-facet not available (yet)
#endif
#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
case char_facet_t::char16_f: return std::locale(in, new collate_impl<char16_t>(cd));
case char_facet_t::char16_f: return impl::create_collators<char16_t, collate_impl>(in, cd);
#endif
#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
case char_facet_t::char32_f: return std::locale(in, new collate_impl<char32_t>(cd));
case char_facet_t::char32_f: return impl::create_collators<char32_t, collate_impl>(in, cd);
#endif
}
return in;
Expand Down
3 changes: 3 additions & 0 deletions src/boost/locale/shared/ids.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//
// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
// Copyright (c) 2024 Alexander Grund
//
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt
Expand All @@ -24,6 +25,7 @@ namespace boost { namespace locale {
BOOST_LOCALE_DEFINE_ID(calendar_facet);

#define BOOST_LOCALE_INSTANTIATE(CHARTYPE) \
BOOST_LOCALE_DEFINE_ID(collator<CHARTYPE>); \
BOOST_LOCALE_DEFINE_ID(converter<CHARTYPE>); \
BOOST_LOCALE_DEFINE_ID(message_format<CHARTYPE>); \
BOOST_LOCALE_DEFINE_ID(boundary::boundary_indexing<CHARTYPE>);
Expand All @@ -48,6 +50,7 @@ namespace boost { namespace locale {
void init_by(const std::locale& l)
{
init_facet<boundary::boundary_indexing<Char>>(l);
init_facet<collator<Char>>(l);
init_facet<converter<Char>>(l);
init_facet<message_format<Char>>(l);
}
Expand Down
2 changes: 1 addition & 1 deletion src/boost/locale/shared/mo_lambda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ namespace boost { namespace locale { namespace gnu_gettext { namespace lambda {

namespace { // anon
template<class TExp, typename... Ts>
expr_ptr make_expr(Ts... ts)
expr_ptr make_expr(Ts&&... ts)
{
return expr_ptr(new TExp(std::forward<Ts>(ts)...));
}
Expand Down
58 changes: 58 additions & 0 deletions src/boost/locale/shared/std_collate_adapter.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
//
// Copyright (c) 2024 Alexander Grund
//
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt

#ifndef BOOST_LOCALE_STD_COLLATE_ADAPTER_HPP
#define BOOST_LOCALE_STD_COLLATE_ADAPTER_HPP

#include <boost/locale/collator.hpp>
#include <locale>
#include <utility>

namespace boost { namespace locale { namespace impl {

template<typename CharT, class Base>
class BOOST_SYMBOL_VISIBLE std_collate_adapter : public std::collate<CharT> {
public:
using typename std::collate<CharT>::string_type;

template<typename... TArgs>
explicit std_collate_adapter(TArgs&&... args) : base_(std::forward<TArgs>(args)...)
{}

protected:
int do_compare(const CharT* beg1, const CharT* end1, const CharT* beg2, const CharT* end2) const override
{
return base_.compare(collate_level::identical, beg1, end1, beg2, end2);
}

string_type do_transform(const CharT* beg, const CharT* end) const override
{
return base_.transform(collate_level::identical, beg, end);
}
long do_hash(const CharT* beg, const CharT* end) const override
{
return base_.hash(collate_level::identical, beg, end);
}
Base base_;
};

template<typename CharType, class CollatorImpl, typename... TArgs>
static std::locale create_collators(const std::locale& in, TArgs&&... args)
{
static_assert(std::is_base_of<collator<CharType>, CollatorImpl>::value, "Must be a collator implementation");
std::locale res(in, new CollatorImpl(args...));
return std::locale(res, new std_collate_adapter<CharType, CollatorImpl>(args...));
}

template<typename CharType, template<typename> class CollatorImpl, typename... TArgs>
static std::locale create_collators(const std::locale& in, TArgs&&... args)
{
return create_collators<CharType, CollatorImpl<CharType>>(in, args...);
}

}}} // namespace boost::locale::impl

#endif
2 changes: 1 addition & 1 deletion src/boost/locale/win32/all_generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

namespace boost { namespace locale { namespace impl_win {

class winlocale;
struct winlocale;

std::locale create_convert(const std::locale& in, const winlocale& lc, char_facet_t type);

Expand Down
Loading
Loading