blob: a213ae94c2a2f92234b43babe07c323c9518f255 [file] [log] [blame]
#ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP
#define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP
#include <boost/assert.hpp>
#include <boost/range/iterator_range_core.hpp>
#include <utility>
namespace boost { namespace property_tree {
namespace json_parser { namespace detail
{
struct external_wide_encoding
{
typedef wchar_t external_char;
bool is_nl(wchar_t c) const { return c == L'\n'; }
bool is_ws(wchar_t c) const {
return c == L' ' || c == L'\t' || c == L'\n' || c == L'\r';
}
bool is_minus(wchar_t c) const { return c == L'-'; }
bool is_plusminus(wchar_t c) const { return c == L'+' || c == L'-'; }
bool is_dot(wchar_t c) const { return c == L'.'; }
bool is_eE(wchar_t c) const { return c == L'e' || c == L'E'; }
bool is_0(wchar_t c) const { return c == L'0'; }
bool is_digit(wchar_t c) const { return c >= L'0' && c <= L'9'; }
bool is_digit0(wchar_t c) const { return c >= L'1' && c <= L'9'; }
bool is_quote(wchar_t c) const { return c == L'"'; }
bool is_backslash(wchar_t c) const { return c == L'\\'; }
bool is_slash(wchar_t c) const { return c == L'/'; }
bool is_comma(wchar_t c) const { return c == L','; }
bool is_open_bracket(wchar_t c) const { return c == L'['; }
bool is_close_bracket(wchar_t c) const { return c == L']'; }
bool is_colon(wchar_t c) const { return c == L':'; }
bool is_open_brace(wchar_t c) const { return c == L'{'; }
bool is_close_brace(wchar_t c) const { return c == L'}'; }
bool is_a(wchar_t c) const { return c == L'a'; }
bool is_b(wchar_t c) const { return c == L'b'; }
bool is_e(wchar_t c) const { return c == L'e'; }
bool is_f(wchar_t c) const { return c == L'f'; }
bool is_l(wchar_t c) const { return c == L'l'; }
bool is_n(wchar_t c) const { return c == L'n'; }
bool is_r(wchar_t c) const { return c == L'r'; }
bool is_s(wchar_t c) const { return c == L's'; }
bool is_t(wchar_t c) const { return c == L't'; }
bool is_u(wchar_t c) const { return c == L'u'; }
int decode_hexdigit(wchar_t c) {
if (c >= L'0' && c <= L'9') return c - L'0';
if (c >= L'A' && c <= L'F') return c - L'A' + 10;
if (c >= L'a' && c <= L'f') return c - L'a' + 10;
return -1;
}
};
template <bool B> struct is_utf16 {};
class wide_wide_encoding : public external_wide_encoding
{
typedef is_utf16<sizeof(wchar_t) == 2> test_utf16;
public:
typedef wchar_t internal_char;
template <typename Iterator>
boost::iterator_range<Iterator>
to_internal(Iterator first, Iterator last) const {
return boost::make_iterator_range(first, last);
}
wchar_t to_internal_trivial(wchar_t c) const {
BOOST_ASSERT(!is_surrogate_high(c) && !is_surrogate_low(c));
return c;
}
template <typename Iterator, typename Sentinel,
typename EncodingErrorFn>
void skip_codepoint(Iterator& cur, Sentinel end,
EncodingErrorFn error_fn) const {
transcode_codepoint(cur, end, DoNothing(), error_fn);
}
template <typename Iterator, typename Sentinel, typename TranscodedFn,
typename EncodingErrorFn>
void transcode_codepoint(Iterator& cur, Sentinel end,
TranscodedFn transcoded_fn, EncodingErrorFn error_fn) const {
return transcode_codepoint(cur, end, transcoded_fn, error_fn,
test_utf16());
}
template <typename TranscodedFn>
void feed_codepoint(unsigned codepoint,
TranscodedFn transcoded_fn) const {
feed_codepoint(codepoint, transcoded_fn, test_utf16());
}
template <typename Iterator, typename Sentinel>
void skip_introduction(Iterator& cur, Sentinel end) const {
// Endianness is already decoded at this level.
if (cur != end && *cur == 0xfeff) {
++cur;
}
}
private:
struct DoNothing {
void operator ()(wchar_t) const {}
};
template <typename Iterator, typename Sentinel, typename TranscodedFn,
typename EncodingErrorFn>
void transcode_codepoint(Iterator& cur, Sentinel,
TranscodedFn transcoded_fn,
EncodingErrorFn error_fn,
is_utf16<false>) const {
wchar_t c = *cur;
if (c < 0x20) {
error_fn();
}
transcoded_fn(c);
++cur;
}
template <typename Iterator, typename Sentinel, typename TranscodedFn,
typename EncodingErrorFn>
void transcode_codepoint(Iterator& cur, Sentinel end,
TranscodedFn transcoded_fn,
EncodingErrorFn error_fn,
is_utf16<true>) const {
wchar_t c = *cur;
if (c < 0x20) {
error_fn();
}
if (is_surrogate_low(c)) {
error_fn();
}
transcoded_fn(c);
++cur;
if (is_surrogate_high(c)) {
if (cur == end) {
error_fn();
}
c = *cur;
if (!is_surrogate_low(c)) {
error_fn();
}
transcoded_fn(c);
++cur;
}
}
template <typename TranscodedFn>
void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn,
is_utf16<false>) const {
transcoded_fn(static_cast<wchar_t>(codepoint));
}
template <typename TranscodedFn>
void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn,
is_utf16<true>) const {
if (codepoint < 0x10000) {
transcoded_fn(static_cast<wchar_t>(codepoint));
} else {
codepoint -= 0x10000;
transcoded_fn(static_cast<wchar_t>((codepoint >> 10) | 0xd800));
transcoded_fn(static_cast<wchar_t>(
(codepoint & 0x3ff) | 0xdc00));
}
}
static bool is_surrogate_high(unsigned codepoint) {
return (codepoint & 0xfc00) == 0xd800;
}
static bool is_surrogate_low(unsigned codepoint) {
return (codepoint & 0xfc00) == 0xdc00;
}
};
}}}}
#endif