Skip to content

Commit

Permalink
Yet another refactoring of codec iterators.
Browse files Browse the repository at this point in the history
Martinho Fernandes committed Aug 31, 2012
1 parent 4bdc733 commit f8d905f
Showing 10 changed files with 78 additions and 136 deletions.
28 changes: 5 additions & 23 deletions include/ogonek/encoding/encoding_scheme.h++
Original file line number Diff line number Diff line change
@@ -84,21 +84,13 @@ namespace ogonek {
using state = typename EncodingForm::state;
using code_unit = ogonek::byte;

template <typename SinglePassRange,
typename Iterator = typename boost::range_const_iterator<SinglePassRange>::type,
typename EncodingIterator = encoding_iterator<encoding_scheme<EncodingForm, ByteOrder>, Iterator>>
static boost::iterator_range<EncodingIterator> encode(SinglePassRange const& r) {
return boost::make_iterator_range(
EncodingIterator { boost::begin(r), boost::end(r) },
EncodingIterator { boost::end(r), boost::end(r) });
}
template <typename SinglePassRange,
template <typename SinglePassRange, typename ValidationCallback,
typename Iterator = typename boost::range_const_iterator<SinglePassRange>::type,
typename DecodingIterator = decoding_iterator<encoding_scheme<EncodingForm, ByteOrder>, Iterator>>
static boost::iterator_range<DecodingIterator> decode(SinglePassRange const& r) {
typename EncodingIterator = encoding_iterator<encoding_scheme<EncodingForm, ByteOrder>, Iterator, ValidationCallback>>
static boost::iterator_range<EncodingIterator> encode(SinglePassRange const& r, ValidationCallback&& callback) {
return boost::make_iterator_range(
DecodingIterator { boost::begin(r), boost::end(r) },
DecodingIterator { boost::end(r), boost::end(r) });
EncodingIterator { boost::begin(r), boost::end(r), callback },
EncodingIterator { boost::end(r), boost::end(r), callback });
}

template <typename SinglePassRange, typename ValidationCallback,
@@ -120,16 +112,6 @@ namespace ogonek {
}
return { result, std::size_t(out - result.begin()) };
}
template <typename SinglePassRange>
static boost::sub_range<SinglePassRange> decode_one(SinglePassRange const& r, codepoint& out, state& s) {
using code_unit_range = encoding_scheme_detail::byte_ordered_range<ByteOrder, typename EncodingForm::code_unit, SinglePassRange>;
using iterator = typename boost::range_iterator<code_unit_range>::type;
code_unit_range range {
iterator { boost::begin(r) }, iterator { boost::end(r) }
};
auto remaining = EncodingForm::decode_one(range, out, s);
return { remaining.begin().it, r.end() };
}
template <typename SinglePassRange, typename ValidationCallback>
static boost::sub_range<SinglePassRange> decode_one(SinglePassRange const& r, codepoint& out, state& s, ValidationCallback&& callback) {
using code_unit_range = encoding_scheme_detail::byte_ordered_range<ByteOrder, typename EncodingForm::code_unit, SinglePassRange>;
59 changes: 20 additions & 39 deletions include/ogonek/encoding/iterator.h++
Original file line number Diff line number Diff line change
@@ -16,6 +16,7 @@

#include "../traits.h++"
#include "../types.h++"
#include "../validation.h++"

#include <boost/iterator/iterator_facade.hpp>
#include <boost/range/iterator_range.hpp>
@@ -54,17 +55,27 @@ namespace ogonek {
std::array<T, N> array;
};

template <typename EncodingForm, typename Iterator>
inline void validate(codepoint&, decltype(skip_validation)) {}
inline bool is_surrogate(codepoint u) { return u >= 0xD800 && u <= 0xDFFF; }
template <typename Callback>
void validate(codepoint& u, Callback&& callback) {
auto list = { u };
if(u > 0x10FFFF || is_surrogate(u)) {
callback(validation_result::illegal, boost::sub_range<decltype(list)>(list), u); // TODO: how to use the result?
}
}

template <typename EncodingForm, typename Iterator, typename ValidationCallback>
struct encoding_iterator
: boost::iterator_facade<
encoding_iterator<EncodingForm, Iterator>,
encoding_iterator<EncodingForm, Iterator, ValidationCallback>,
CodeUnit<EncodingForm>,
std::input_iterator_tag, // TODO
CodeUnit<EncodingForm>
> {
public:
encoding_iterator(Iterator first, Iterator last)
: first(first), last(last) {
encoding_iterator(Iterator first, Iterator last, ValidationCallback callback)
: first(first), last(last), callback(std::forward<ValidationCallback>(callback)) {
encode_next();
}

@@ -86,20 +97,23 @@ namespace ogonek {
private:
void encode_next() {
if(first != last) {
encoded = EncodingForm::encode_one(*first++, state);
auto u = *first++;
validate(u, callback);
encoded = EncodingForm::encode_one(u, state);
current = 0;
} else {
current = -1;
}
}

Iterator first, last;
typename std::decay<ValidationCallback>::type callback;
typename EncodingForm::state state {};
partial_array<CodeUnit<EncodingForm>, EncodingForm::max_width> encoded {};
int current;
};

template <typename EncodingForm, typename Iterator, typename ValidationCallback = void>
template <typename EncodingForm, typename Iterator, typename ValidationCallback>
struct decoding_iterator
: boost::iterator_facade<
decoding_iterator<EncodingForm, Iterator, ValidationCallback>,
@@ -132,39 +146,6 @@ namespace ogonek {
typename std::decay<ValidationCallback>::type callback;
typename EncodingForm::state state {};
};

template <typename EncodingForm, typename Iterator>
struct decoding_iterator<EncodingForm, Iterator, void>
: boost::iterator_facade<
decoding_iterator<EncodingForm, Iterator>,
codepoint,
std::input_iterator_tag, // TODO
codepoint
> {
public:
using range = boost::iterator_range<Iterator>;

decoding_iterator(Iterator first, Iterator last)
: first(first), last(last), state{} {}

codepoint dereference() const {
codepoint u;
auto s = state;
EncodingForm::decode_one(boost::sub_range<range>(first, last), u, s);
return u;
}
bool equal(decoding_iterator const& that) const {
return first == that.first || (first == last && that.first == that.last);
}
void increment() {
codepoint dummy;
first = EncodingForm::decode_one(boost::sub_range<range>(first, last), dummy, state).begin();
}

private:
Iterator first, last;
typename EncodingForm::state state;
};
} // namespace ogonek

#endif // OGONEK_ENCODING_ITERATOR_HPP
20 changes: 6 additions & 14 deletions include/ogonek/encoding/utf16.h++
Original file line number Diff line number Diff line change
@@ -34,21 +34,13 @@ namespace ogonek {
static constexpr bool is_self_synchronizing = true;
struct state {};

template <typename SinglePassRange,
typename Iterator = typename boost::range_const_iterator<SinglePassRange>::type,
typename EncodingIterator = encoding_iterator<utf16, Iterator>>
static boost::iterator_range<EncodingIterator> encode(SinglePassRange const& r) {
return boost::make_iterator_range(
EncodingIterator { boost::begin(r), boost::end(r) },
EncodingIterator { boost::end(r), boost::end(r) });
}
template <typename SinglePassRange,
template <typename SinglePassRange, typename ValidationCallback,
typename Iterator = typename boost::range_const_iterator<SinglePassRange>::type,
typename DecodingIterator = decoding_iterator<utf16, Iterator>>
static boost::iterator_range<DecodingIterator> decode(SinglePassRange const& r) {
typename EncodingIterator = encoding_iterator<utf16, Iterator, ValidationCallback>>
static boost::iterator_range<EncodingIterator> encode(SinglePassRange const& r, ValidationCallback&& callback) {
return boost::make_iterator_range(
DecodingIterator { boost::begin(r), boost::end(r) },
DecodingIterator { boost::end(r), boost::end(r) });
EncodingIterator { boost::begin(r), boost::end(r), callback },
EncodingIterator { boost::end(r), boost::end(r), callback });
}

template <typename SinglePassRange, typename ValidationCallback,
@@ -79,7 +71,7 @@ namespace ogonek {
static bool is_surrogate(codepoint u) { return u >= 0xD800 && u <= 0xDFFF; };

template <typename SinglePassRange>
static boost::sub_range<SinglePassRange> decode_one(SinglePassRange const& r, codepoint& out, state&) {
static boost::sub_range<SinglePassRange> decode_one(SinglePassRange const& r, codepoint& out, state&, decltype(skip_validation)) {
auto first = boost::begin(r);
auto lead = *first++;
if(!is_surrogate(lead)) {
20 changes: 6 additions & 14 deletions include/ogonek/encoding/utf32.h++
Original file line number Diff line number Diff line change
@@ -30,21 +30,13 @@ namespace ogonek {
static constexpr bool is_self_synchronizing = true;
struct state {};

template <typename SinglePassRange,
typename Iterator = typename boost::range_const_iterator<SinglePassRange>::type,
typename EncodingIterator = encoding_iterator<utf32, Iterator>>
static boost::iterator_range<EncodingIterator> encode(SinglePassRange const& r) {
return boost::make_iterator_range(
EncodingIterator { boost::begin(r), boost::end(r) },
EncodingIterator { boost::end(r), boost::end(r) });
}
template <typename SinglePassRange,
template <typename SinglePassRange, typename ValidationCallback,
typename Iterator = typename boost::range_const_iterator<SinglePassRange>::type,
typename DecodingIterator = decoding_iterator<utf32, Iterator>>
static boost::iterator_range<DecodingIterator> decode(SinglePassRange const& r) {
typename EncodingIterator = encoding_iterator<utf32, Iterator, ValidationCallback>>
static boost::iterator_range<EncodingIterator> encode(SinglePassRange const& r, ValidationCallback&& callback) {
return boost::make_iterator_range(
DecodingIterator { boost::begin(r), boost::end(r) },
DecodingIterator { boost::end(r), boost::end(r) });
EncodingIterator { boost::begin(r), boost::end(r), callback },
EncodingIterator { boost::end(r), boost::end(r), callback });
}

template <typename SinglePassRange, typename ValidationCallback,
@@ -60,7 +52,7 @@ namespace ogonek {
return {{ u }};
}
template <typename SinglePassRange>
static boost::sub_range<SinglePassRange> decode_one(SinglePassRange const& r, codepoint& out, state&) {
static boost::sub_range<SinglePassRange> decode_one(SinglePassRange const& r, codepoint& out, state&, decltype(skip_validation)) {
auto first = boost::begin(r);
out = *first++;
return { first, boost::end(r) };
21 changes: 6 additions & 15 deletions include/ogonek/encoding/utf8.h++
Original file line number Diff line number Diff line change
@@ -35,23 +35,14 @@ namespace ogonek {
static constexpr bool is_self_synchronizing = true;
struct state {};

template <typename SinglePassRange,
typename Iterator = typename boost::range_const_iterator<SinglePassRange>::type,
typename EncodingIterator = encoding_iterator<utf8, Iterator>>
static boost::iterator_range<EncodingIterator> encode(SinglePassRange const& r) {
return boost::make_iterator_range(
EncodingIterator { boost::begin(r), boost::end(r) },
EncodingIterator { boost::end(r), boost::end(r) });
}
template <typename SinglePassRange,
template <typename SinglePassRange, typename ValidationCallback,
typename Iterator = typename boost::range_const_iterator<SinglePassRange>::type,
typename DecodingIterator = decoding_iterator<utf8, Iterator>>
static boost::iterator_range<DecodingIterator> decode(SinglePassRange const& r) {
typename EncodingIterator = encoding_iterator<utf8, Iterator, ValidationCallback>>
static boost::iterator_range<EncodingIterator> encode(SinglePassRange const& r, ValidationCallback&& callback) {
return boost::make_iterator_range(
DecodingIterator { boost::begin(r), boost::end(r) },
DecodingIterator { boost::end(r), boost::end(r) });
EncodingIterator { boost::begin(r), boost::end(r), callback },
EncodingIterator { boost::end(r), boost::end(r), callback });
}

template <typename SinglePassRange, typename ValidationCallback,
typename Iterator = typename boost::range_const_iterator<SinglePassRange>::type,
typename DecodingIterator = decoding_iterator<utf8, Iterator, ValidationCallback>>
@@ -104,7 +95,7 @@ namespace ogonek {
return ((b0 & 0x07) << 18) | ((b1 & 0x3F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F);
}
template <typename SinglePassRange>
static boost::sub_range<SinglePassRange> decode_one(SinglePassRange const& r, codepoint& out, state&) {
static boost::sub_range<SinglePassRange> decode_one(SinglePassRange const& r, codepoint& out, state&, decltype(skip_validation)) {
auto first = boost::begin(r);
byte b0 = *first++;
auto length = sequence_length(b0);
18 changes: 8 additions & 10 deletions include/ogonek/text.h++
Original file line number Diff line number Diff line change
@@ -100,14 +100,12 @@ namespace ogonek {

//! Construct from a codepoint range, with validation callback
template <typename CodepointRange, typename ValidationCallback>
basic_text(CodepointRange const& range, ValidationCallback&& /*callback*/)
: basic_text(direct{}, EncodingForm::encode(range)) { // TODO use callback!
basic_text(CodepointRange const& range, ValidationCallback&& callback)
: basic_text(direct{}, EncodingForm::encode(range, std::forward<ValidationCallback>(callback))) {
static_assert(std::is_same<detail::RangeValueType<CodepointRange>, codepoint>::value,
"Can only construct text from a range of codepoints");
}

// -- code units

// -- storage
//! Construct from an underlying container
explicit basic_text(Container storage)
@@ -116,13 +114,13 @@ namespace ogonek {

//** Range **

using iterator = decoding_iterator<EncodingForm, typename Container::iterator>;
using const_iterator = decoding_iterator<EncodingForm, typename Container::const_iterator>;
using iterator = decoding_iterator<EncodingForm, typename Container::iterator, decltype(skip_validation)>;
using const_iterator = decoding_iterator<EncodingForm, typename Container::const_iterator, decltype(skip_validation)>;

iterator begin() { return iterator { storage_.begin(), storage_.end() }; }
iterator end() { return iterator { storage_.end(), storage_.end() }; }
const_iterator begin() const { return const_iterator { storage_.begin(), storage_.end() }; }
const_iterator end() const { return const_iterator { storage_.end(), storage_.end() }; }
iterator begin() { return iterator { storage_.begin(), storage_.end(), skip_validation }; }
iterator end() { return iterator { storage_.end(), storage_.end(), skip_validation }; }
const_iterator begin() const { return const_iterator { storage_.begin(), storage_.end(), skip_validation }; }
const_iterator end() const { return const_iterator { storage_.end(), storage_.end(), skip_validation }; }

//** Interoperation **

14 changes: 8 additions & 6 deletions include/ogonek/validation.h++
Original file line number Diff line number Diff line change
@@ -32,27 +32,29 @@ namespace ogonek {
}
};

struct {
constexpr struct {
template <typename Range>
boost::sub_range<Range> operator()(validation_result, boost::sub_range<Range> const&, codepoint&) const {
throw validation_error();
}
} constexpr throw_validation_error = {};
} throw_validation_error = {};

struct {
constexpr struct {
template <typename Range>
boost::sub_range<Range> operator()(validation_result, boost::sub_range<Range> const& source, codepoint& out) const {
out = U'\xFFFD';
return { std::next(boost::begin(source)), boost::end(source) };
}
} constexpr use_replacement_character = {};
} use_replacement_character = {};

struct {
constexpr struct {
template <typename Range>
boost::sub_range<Range> operator()(validation_result, boost::sub_range<Range> const& source, codepoint&) const {
return { std::next(boost::begin(source)), boost::end(source) };
}
} constexpr ignore_errors = {};
} ignore_errors = {};

constexpr struct skip_validation_t {} skip_validation = {};
} // namespace ogonek

#endif // OGONEK_VALIDATION_HPP
Loading

0 comments on commit f8d905f

Please sign in to comment.