| #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP |
| #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP |
| |
| #include <boost/property_tree/json_parser/error.hpp> |
| |
| #include <boost/core/ref.hpp> |
| #include <boost/bind/bind.hpp> |
| #include <boost/format.hpp> |
| |
| #include <iterator> |
| #include <sstream> |
| #include <string> |
| |
| namespace boost { namespace property_tree { |
| namespace json_parser { namespace detail |
| { |
| |
| template <typename Encoding, typename Iterator, typename Sentinel> |
| class source |
| { |
| public: |
| typedef typename std::iterator_traits<Iterator>::value_type |
| code_unit; |
| typedef bool (Encoding::*encoding_predicate)(code_unit c) const; |
| |
| explicit source(Encoding& encoding) : encoding(encoding) {} |
| |
| template <typename Range> |
| void set_input(const std::string& filename, const Range& r) |
| { |
| this->filename = filename; |
| cur = r.begin(); |
| end = r.end(); |
| // Note that there is no backtracking, so if e.g. a UTF-8 file |
| // starts with something that initially looks like a BOM but isn't, |
| // there's trouble. |
| // However, no valid JSON file can start with a UTF-8 EF byte. |
| encoding.skip_introduction(cur, end); |
| line = 1; |
| offset = 0; |
| } |
| |
| bool done() const { return cur == end; } |
| |
| void parse_error(const char* msg) { |
| BOOST_PROPERTY_TREE_THROW( |
| json_parser_error(msg, filename, line)); |
| } |
| |
| void next() { |
| if (encoding.is_nl(*cur)) { |
| ++line; |
| offset = 0; |
| } else { |
| ++offset; |
| } |
| ++cur; |
| } |
| |
| template <typename Action> |
| bool have(encoding_predicate p, Action& a) { |
| bool found = cur != end && (encoding.*p)(*cur); |
| if (found) { |
| a(*cur); |
| next(); |
| } |
| return found; |
| } |
| |
| bool have(encoding_predicate p) { |
| DoNothing n; |
| return have(p, n); |
| } |
| |
| template <typename Action> |
| void expect(encoding_predicate p, const char* msg, Action& a) { |
| if (!have(p, a)) { |
| parse_error(msg); |
| } |
| } |
| |
| void expect(encoding_predicate p, const char* msg) { |
| DoNothing n; |
| expect(p, msg, n); |
| } |
| |
| code_unit need_cur(const char* msg) { |
| if (cur == end) { |
| parse_error(msg); |
| } |
| return *cur; |
| } |
| |
| Iterator& raw_cur() { return cur; } |
| Sentinel raw_end() { return end; } |
| |
| private: |
| struct DoNothing { |
| void operator ()(code_unit) const {} |
| }; |
| |
| Encoding& encoding; |
| Iterator cur; |
| Sentinel end; |
| std::string filename; |
| int line; |
| int offset; |
| }; |
| |
| template <typename Callbacks, typename Encoding, typename Iterator, |
| typename = typename std::iterator_traits<Iterator> |
| ::iterator_category> |
| class number_callback_adapter |
| { |
| public: |
| number_callback_adapter(Callbacks& callbacks, Encoding& encoding, |
| Iterator& cur) |
| : callbacks(callbacks), encoding(encoding), first(cur), cur(cur) |
| {} |
| |
| void operator ()(typename Encoding::external_char) {} |
| |
| void finish() const { |
| callbacks.on_number(encoding.to_internal(first, cur)); |
| } |
| |
| private: |
| number_callback_adapter(const number_callback_adapter&); |
| |
| Callbacks& callbacks; |
| Encoding& encoding; |
| Iterator first; |
| Iterator& cur; |
| }; |
| |
| template <typename Callbacks, typename Encoding, typename Iterator> |
| class number_callback_adapter<Callbacks, Encoding, Iterator, |
| std::input_iterator_tag> |
| { |
| public: |
| number_callback_adapter(Callbacks& callbacks, Encoding& encoding, |
| Iterator&) |
| : callbacks(callbacks), encoding(encoding), first(true) |
| {} |
| |
| void operator ()(typename Encoding::external_char c) { |
| if (first) { |
| callbacks.on_begin_number(); |
| first = false; |
| } |
| callbacks.on_digit(encoding.to_internal_trivial(c)); |
| } |
| |
| void finish() const { |
| callbacks.on_end_number(); |
| } |
| private: |
| number_callback_adapter(const number_callback_adapter&); |
| |
| Callbacks& callbacks; |
| Encoding& encoding; |
| bool first; |
| }; |
| |
| template <typename Callbacks, typename Encoding, typename Iterator, |
| typename = typename std::iterator_traits<Iterator> |
| ::iterator_category> |
| class string_callback_adapter |
| { |
| public: |
| string_callback_adapter(Callbacks& callbacks, Encoding& encoding, |
| Iterator& cur) |
| : callbacks(callbacks), encoding(encoding), cur(cur), |
| run_begin(cur) |
| {} |
| |
| void start_run() { |
| run_begin = cur; |
| } |
| |
| void finish_run() { |
| callbacks.on_code_units(encoding.to_internal(run_begin, cur)); |
| } |
| |
| template <typename Sentinel, typename EncodingErrorFn> |
| void process_codepoint(Sentinel end, EncodingErrorFn error_fn) { |
| encoding.skip_codepoint(cur, end, error_fn); |
| } |
| |
| private: |
| string_callback_adapter(const string_callback_adapter&); |
| |
| Callbacks& callbacks; |
| Encoding& encoding; |
| Iterator& cur; |
| Iterator run_begin; |
| }; |
| |
| template <typename Callbacks, typename Encoding, typename Iterator> |
| class string_callback_adapter<Callbacks, Encoding, Iterator, |
| std::input_iterator_tag> |
| { |
| public: |
| string_callback_adapter(Callbacks& callbacks, Encoding& encoding, |
| Iterator& cur) |
| : callbacks(callbacks), encoding(encoding), cur(cur) |
| {} |
| |
| void start_run() {} |
| |
| void finish_run() {} |
| |
| template <typename Sentinel, typename EncodingErrorFn> |
| void process_codepoint(Sentinel end, EncodingErrorFn error_fn) { |
| encoding.transcode_codepoint(cur, end, |
| boost::bind(&Callbacks::on_code_unit, |
| boost::ref(callbacks), boost::placeholders::_1), |
| error_fn); |
| } |
| |
| private: |
| string_callback_adapter(const string_callback_adapter&); |
| |
| Callbacks& callbacks; |
| Encoding& encoding; |
| Iterator& cur; |
| }; |
| |
| template <typename Callbacks, typename Encoding, typename Iterator, |
| typename Sentinel> |
| class parser |
| { |
| typedef detail::number_callback_adapter<Callbacks, Encoding, Iterator> |
| number_adapter; |
| typedef detail::string_callback_adapter<Callbacks, Encoding, Iterator> |
| string_adapter; |
| typedef detail::source<Encoding, Iterator, Sentinel> source; |
| typedef typename source::code_unit code_unit; |
| |
| public: |
| parser(Callbacks& callbacks, Encoding& encoding) |
| : callbacks(callbacks), encoding(encoding), src(encoding) |
| {} |
| |
| template <typename Range> |
| void set_input(const std::string& filename, const Range& r) { |
| src.set_input(filename, r); |
| } |
| |
| void finish() { |
| skip_ws(); |
| if (!src.done()) { |
| parse_error("garbage after data"); |
| } |
| } |
| |
| void parse_value() { |
| if (parse_object()) return; |
| if (parse_array()) return; |
| if (parse_string()) return; |
| if (parse_boolean()) return; |
| if (parse_null()) return; |
| if (parse_number()) return; |
| parse_error("expected value"); |
| } |
| |
| bool parse_null() { |
| skip_ws(); |
| if (!have(&Encoding::is_n)) { |
| return false; |
| } |
| expect(&Encoding::is_u, "expected 'null'"); |
| expect(&Encoding::is_l, "expected 'null'"); |
| expect(&Encoding::is_l, "expected 'null'"); |
| callbacks.on_null(); |
| return true; |
| } |
| |
| bool parse_boolean() { |
| skip_ws(); |
| if (have(&Encoding::is_t)) { |
| expect(&Encoding::is_r, "expected 'true'"); |
| expect(&Encoding::is_u, "expected 'true'"); |
| expect(&Encoding::is_e, "expected 'true'"); |
| callbacks.on_boolean(true); |
| return true; |
| } |
| if (have(&Encoding::is_f)) { |
| expect(&Encoding::is_a, "expected 'false'"); |
| expect(&Encoding::is_l, "expected 'false'"); |
| expect(&Encoding::is_s, "expected 'false'"); |
| expect(&Encoding::is_e, "expected 'false'"); |
| callbacks.on_boolean(false); |
| return true; |
| } |
| return false; |
| } |
| |
| bool parse_number() { |
| skip_ws(); |
| |
| number_adapter adapter(callbacks, encoding, src.raw_cur()); |
| bool started = false; |
| if (have(&Encoding::is_minus, adapter)) { |
| started = true; |
| } |
| if (!have(&Encoding::is_0, adapter) && !parse_int_part(adapter)) { |
| if (started) { |
| parse_error("expected digits after -"); |
| } |
| return false; |
| } |
| parse_frac_part(adapter); |
| parse_exp_part(adapter); |
| adapter.finish(); |
| return true; |
| } |
| |
| bool parse_string() { |
| skip_ws(); |
| |
| if (!have(&Encoding::is_quote)) { |
| return false; |
| } |
| |
| callbacks.on_begin_string(); |
| string_adapter adapter(callbacks, encoding, src.raw_cur()); |
| while (!encoding.is_quote(need_cur("unterminated string"))) { |
| if (encoding.is_backslash(*src.raw_cur())) { |
| adapter.finish_run(); |
| next(); |
| parse_escape(); |
| adapter.start_run(); |
| } else { |
| adapter.process_codepoint(src.raw_end(), |
| boost::bind(&parser::parse_error, |
| this, "invalid code sequence")); |
| } |
| } |
| adapter.finish_run(); |
| callbacks.on_end_string(); |
| next(); |
| return true; |
| } |
| |
| bool parse_array() { |
| skip_ws(); |
| |
| if (!have(&Encoding::is_open_bracket)) { |
| return false; |
| } |
| |
| callbacks.on_begin_array(); |
| skip_ws(); |
| if (have(&Encoding::is_close_bracket)) { |
| callbacks.on_end_array(); |
| return true; |
| } |
| do { |
| parse_value(); |
| skip_ws(); |
| } while (have(&Encoding::is_comma)); |
| expect(&Encoding::is_close_bracket, "expected ']' or ','"); |
| callbacks.on_end_array(); |
| return true; |
| } |
| |
| bool parse_object() { |
| skip_ws(); |
| |
| if (!have(&Encoding::is_open_brace)) { |
| return false; |
| } |
| |
| callbacks.on_begin_object(); |
| skip_ws(); |
| if (have(&Encoding::is_close_brace)) { |
| callbacks.on_end_object(); |
| return true; |
| } |
| do { |
| if (!parse_string()) { |
| parse_error("expected key string"); |
| } |
| skip_ws(); |
| expect(&Encoding::is_colon, "expected ':'"); |
| parse_value(); |
| skip_ws(); |
| } while (have(&Encoding::is_comma)); |
| expect(&Encoding::is_close_brace, "expected '}' or ','"); |
| callbacks.on_end_object(); |
| return true; |
| } |
| |
| private: |
| typedef typename source::encoding_predicate encoding_predicate; |
| |
| void parse_error(const char* msg) { src.parse_error(msg); } |
| void next() { src.next(); } |
| template <typename Action> |
| bool have(encoding_predicate p, Action& a) { return src.have(p, a); } |
| bool have(encoding_predicate p) { return src.have(p); } |
| template <typename Action> |
| void expect(encoding_predicate p, const char* msg, Action& a) { |
| src.expect(p, msg, a); |
| } |
| void expect(encoding_predicate p, const char* msg) { |
| src.expect(p, msg); |
| } |
| code_unit need_cur(const char* msg) { return src.need_cur(msg); } |
| |
| void skip_ws() { |
| while (have(&Encoding::is_ws)) { |
| } |
| } |
| |
| bool parse_int_part(number_adapter& action) { |
| if (!have(&Encoding::is_digit0, action)) { |
| return false; |
| } |
| parse_digits(action); |
| return true; |
| } |
| |
| void parse_frac_part(number_adapter& action) { |
| if (!have(&Encoding::is_dot, action)) { |
| return; |
| } |
| expect(&Encoding::is_digit, "need at least one digit after '.'", |
| action); |
| parse_digits(action); |
| } |
| |
| void parse_exp_part(number_adapter& action) { |
| if (!have(&Encoding::is_eE, action)) { |
| return; |
| } |
| have(&Encoding::is_plusminus, action); |
| expect(&Encoding::is_digit, "need at least one digit in exponent", |
| action); |
| parse_digits(action); |
| } |
| |
| void parse_digits(number_adapter& action) { |
| while (have(&Encoding::is_digit, action)) { |
| } |
| } |
| |
| void parse_escape() { |
| if (have(&Encoding::is_quote)) { |
| feed(0x22); |
| } else if (have(&Encoding::is_backslash)) { |
| feed(0x5c); |
| } else if (have(&Encoding::is_slash)) { |
| feed(0x2f); |
| } else if (have(&Encoding::is_b)) { |
| feed(0x08); // backspace |
| } else if (have(&Encoding::is_f)) { |
| feed(0x0c); // formfeed |
| } else if (have(&Encoding::is_n)) { |
| feed(0x0a); // line feed |
| } else if (have(&Encoding::is_r)) { |
| feed(0x0d); // carriage return |
| } else if (have(&Encoding::is_t)) { |
| feed(0x09); // horizontal tab |
| } else if (have(&Encoding::is_u)) { |
| parse_codepoint_ref(); |
| } else { |
| parse_error("invalid escape sequence"); |
| } |
| } |
| |
| unsigned parse_hex_quad() { |
| unsigned codepoint = 0; |
| for (int i = 0; i < 4; ++i) { |
| int value = encoding.decode_hexdigit( |
| need_cur("invalid escape sequence")); |
| if (value < 0) { |
| parse_error("invalid escape sequence"); |
| } |
| codepoint *= 16; |
| codepoint += value; |
| next(); |
| } |
| return codepoint; |
| } |
| |
| static bool is_surrogate_high(unsigned codepoint) { |
| return (codepoint & 0xfc00) == 0xd800; |
| } |
| static bool is_surrogate_low(unsigned codepoint) { |
| return (codepoint & 0xfc00) == 0xdc00; |
| } |
| static unsigned combine_surrogates(unsigned high, unsigned low) { |
| return 0x010000 + (((high & 0x3ff) << 10) | (low & 0x3ff)); |
| } |
| |
| void parse_codepoint_ref() { |
| unsigned codepoint = parse_hex_quad(); |
| if (is_surrogate_low(codepoint)) { |
| parse_error("invalid codepoint, stray low surrogate"); |
| } |
| if (is_surrogate_high(codepoint)) { |
| expect(&Encoding::is_backslash, |
| "invalid codepoint, stray high surrogate"); |
| expect(&Encoding::is_u, |
| "expected codepoint reference after high surrogate"); |
| int low = parse_hex_quad(); |
| if (!is_surrogate_low(low)) { |
| parse_error("expected low surrogate after high surrogate"); |
| } |
| codepoint = combine_surrogates(codepoint, low); |
| } |
| feed(codepoint); |
| } |
| |
| void feed(unsigned codepoint) { |
| encoding.feed_codepoint(codepoint, |
| boost::bind(&Callbacks::on_code_unit, |
| boost::ref(callbacks), boost::placeholders::_1)); |
| } |
| |
| Callbacks& callbacks; |
| Encoding& encoding; |
| source src; |
| }; |
| |
| }}}} |
| |
| #endif |