| /* |
| * |
| * Copyright (c) 1998-2002 |
| * John Maddock |
| * |
| * Use, modification and distribution are subject to the |
| * Boost Software License, Version 1.0. (See accompanying file |
| * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
| * |
| */ |
| |
| /* |
| * LOCATION: see http://www.boost.org for most recent version. |
| * FILE regex_format.hpp |
| * VERSION see <boost/version.hpp> |
| * DESCRIPTION: Provides formatting output routines for search and replace |
| * operations. Note this is an internal header file included |
| * by regex.hpp, do not include on its own. |
| */ |
| |
| #ifndef BOOST_REGEX_FORMAT_HPP |
| #define BOOST_REGEX_FORMAT_HPP |
| |
| |
| namespace boost{ |
| |
| #ifdef BOOST_MSVC |
| #pragma warning(push) |
| #pragma warning(disable: 4103) |
| #endif |
| #ifdef BOOST_HAS_ABI_HEADERS |
| # include BOOST_ABI_PREFIX |
| #endif |
| #ifdef BOOST_MSVC |
| #pragma warning(pop) |
| #endif |
| |
| // |
| // Forward declaration: |
| // |
| template <class BidiIterator, class Allocator = BOOST_DEDUCED_TYPENAME std::vector<sub_match<BidiIterator> >::allocator_type > |
| class match_results; |
| |
| namespace re_detail{ |
| |
| // |
| // struct trivial_format_traits: |
| // defines minimum localisation support for formatting |
| // in the case that the actual regex traits is unavailable. |
| // |
| template <class charT> |
| struct trivial_format_traits |
| { |
| typedef charT char_type; |
| |
| static std::ptrdiff_t length(const charT* p) |
| { |
| return global_length(p); |
| } |
| static charT tolower(charT c) |
| { |
| return ::boost::re_detail::global_lower(c); |
| } |
| static charT toupper(charT c) |
| { |
| return ::boost::re_detail::global_upper(c); |
| } |
| static int value(const charT c, int radix) |
| { |
| int result = global_value(c); |
| return result >= radix ? -1 : result; |
| } |
| int toi(const charT*& p1, const charT* p2, int radix)const |
| { |
| return global_toi(p1, p2, radix, *this); |
| } |
| }; |
| |
| template <class OutputIterator, class Results, class traits> |
| class basic_regex_formatter |
| { |
| public: |
| typedef typename traits::char_type char_type; |
| basic_regex_formatter(OutputIterator o, const Results& r, const traits& t) |
| : m_traits(t), m_results(r), m_out(o), m_state(output_copy), m_restore_state(output_copy), m_have_conditional(false) {} |
| OutputIterator format(const char_type* p1, const char_type* p2, match_flag_type f); |
| OutputIterator format(const char_type* p1, match_flag_type f) |
| { |
| return format(p1, p1 + m_traits.length(p1), f); |
| } |
| private: |
| typedef typename Results::value_type sub_match_type; |
| enum output_state |
| { |
| output_copy, |
| output_next_lower, |
| output_next_upper, |
| output_lower, |
| output_upper, |
| output_none |
| }; |
| |
| void put(char_type c); |
| void put(const sub_match_type& sub); |
| void format_all(); |
| void format_perl(); |
| void format_escape(); |
| void format_conditional(); |
| void format_until_scope_end(); |
| bool handle_perl_verb(bool have_brace); |
| |
| const traits& m_traits; // the traits class for localised formatting operations |
| const Results& m_results; // the match_results being used. |
| OutputIterator m_out; // where to send output. |
| const char_type* m_position; // format string, current position |
| const char_type* m_end; // format string end |
| match_flag_type m_flags; // format flags to use |
| output_state m_state; // what to do with the next character |
| output_state m_restore_state; // what state to restore to. |
| bool m_have_conditional; // we are parsing a conditional |
| private: |
| basic_regex_formatter(const basic_regex_formatter&); |
| basic_regex_formatter& operator=(const basic_regex_formatter&); |
| }; |
| |
| template <class OutputIterator, class Results, class traits> |
| OutputIterator basic_regex_formatter<OutputIterator, Results, traits>::format(const char_type* p1, const char_type* p2, match_flag_type f) |
| { |
| m_position = p1; |
| m_end = p2; |
| m_flags = f; |
| format_all(); |
| return m_out; |
| } |
| |
| template <class OutputIterator, class Results, class traits> |
| void basic_regex_formatter<OutputIterator, Results, traits>::format_all() |
| { |
| // over and over: |
| while(m_position != m_end) |
| { |
| switch(*m_position) |
| { |
| case '&': |
| if(m_flags & ::boost::regex_constants::format_sed) |
| { |
| ++m_position; |
| put(m_results[0]); |
| break; |
| } |
| put(*m_position++); |
| break; |
| case '\\': |
| format_escape(); |
| break; |
| case '(': |
| if(m_flags & boost::regex_constants::format_all) |
| { |
| ++m_position; |
| bool have_conditional = m_have_conditional; |
| m_have_conditional = false; |
| format_until_scope_end(); |
| m_have_conditional = have_conditional; |
| if(m_position == m_end) |
| return; |
| BOOST_ASSERT(*m_position == static_cast<char_type>(')')); |
| ++m_position; // skip the closing ')' |
| break; |
| } |
| put(*m_position); |
| ++m_position; |
| break; |
| case ')': |
| if(m_flags & boost::regex_constants::format_all) |
| { |
| return; |
| } |
| put(*m_position); |
| ++m_position; |
| break; |
| case ':': |
| if((m_flags & boost::regex_constants::format_all) && m_have_conditional) |
| { |
| return; |
| } |
| put(*m_position); |
| ++m_position; |
| break; |
| case '?': |
| if(m_flags & boost::regex_constants::format_all) |
| { |
| ++m_position; |
| format_conditional(); |
| break; |
| } |
| put(*m_position); |
| ++m_position; |
| break; |
| case '$': |
| if((m_flags & format_sed) == 0) |
| { |
| format_perl(); |
| break; |
| } |
| // fall through, not a special character: |
| default: |
| put(*m_position); |
| ++m_position; |
| break; |
| } |
| } |
| } |
| |
| template <class OutputIterator, class Results, class traits> |
| void basic_regex_formatter<OutputIterator, Results, traits>::format_perl() |
| { |
| // |
| // On entry *m_position points to a '$' character |
| // output the information that goes with it: |
| // |
| BOOST_ASSERT(*m_position == '$'); |
| // |
| // see if this is a trailing '$': |
| // |
| if(++m_position == m_end) |
| { |
| --m_position; |
| put(*m_position); |
| ++m_position; |
| return; |
| } |
| // |
| // OK find out what kind it is: |
| // |
| bool have_brace = false; |
| const char_type* save_position = m_position; |
| switch(*m_position) |
| { |
| case '&': |
| ++m_position; |
| put(this->m_results[0]); |
| break; |
| case '`': |
| ++m_position; |
| put(this->m_results.prefix()); |
| break; |
| case '\'': |
| ++m_position; |
| put(this->m_results.suffix()); |
| break; |
| case '$': |
| put(*m_position++); |
| break; |
| case '+': |
| if((++m_position != m_end) && (*m_position == '{')) |
| { |
| const char_type* base = ++m_position; |
| while((m_position != m_end) && (*m_position != '}')) ++m_position; |
| if(m_position != m_end) |
| { |
| // Named sub-expression: |
| put(this->m_results.named_subexpression(base, m_position)); |
| ++m_position; |
| break; |
| } |
| else |
| { |
| m_position = --base; |
| } |
| } |
| put((this->m_results)[this->m_results.size() > 1 ? this->m_results.size() - 1 : 1]); |
| break; |
| case '{': |
| have_brace = true; |
| ++m_position; |
| // fall through.... |
| default: |
| // see if we have a number: |
| { |
| std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); |
| //len = (std::min)(static_cast<std::ptrdiff_t>(2), len); |
| int v = m_traits.toi(m_position, m_position + len, 10); |
| if((v < 0) || (have_brace && ((m_position == m_end) || (*m_position != '}')))) |
| { |
| // Look for a Perl-5.10 verb: |
| if(!handle_perl_verb(have_brace)) |
| { |
| // leave the $ as is, and carry on: |
| m_position = --save_position; |
| put(*m_position); |
| ++m_position; |
| } |
| break; |
| } |
| // otherwise output sub v: |
| put(this->m_results[v]); |
| if(have_brace) |
| ++m_position; |
| } |
| } |
| } |
| |
| template <class OutputIterator, class Results, class traits> |
| bool basic_regex_formatter<OutputIterator, Results, traits>::handle_perl_verb(bool have_brace) |
| { |
| // |
| // We may have a capitalised string containing a Perl action: |
| // |
| static const char_type MATCH[] = { 'M', 'A', 'T', 'C', 'H' }; |
| static const char_type PREMATCH[] = { 'P', 'R', 'E', 'M', 'A', 'T', 'C', 'H' }; |
| static const char_type POSTMATCH[] = { 'P', 'O', 'S', 'T', 'M', 'A', 'T', 'C', 'H' }; |
| static const char_type LAST_PAREN_MATCH[] = { 'L', 'A', 'S', 'T', '_', 'P', 'A', 'R', 'E', 'N', '_', 'M', 'A', 'T', 'C', 'H' }; |
| static const char_type LAST_SUBMATCH_RESULT[] = { 'L', 'A', 'S', 'T', '_', 'S', 'U', 'B', 'M', 'A', 'T', 'C', 'H', '_', 'R', 'E', 'S', 'U', 'L', 'T' }; |
| static const char_type LAST_SUBMATCH_RESULT_ALT[] = { '^', 'N' }; |
| |
| if(have_brace && (*m_position == '^')) |
| ++m_position; |
| |
| int max_len = m_end - m_position; |
| |
| if((max_len >= 5) && std::equal(m_position, m_position + 5, MATCH)) |
| { |
| m_position += 5; |
| if(have_brace) |
| { |
| if(*m_position == '}') |
| ++m_position; |
| else |
| { |
| m_position -= 5; |
| return false; |
| } |
| } |
| put(this->m_results[0]); |
| return true; |
| } |
| if((max_len >= 8) && std::equal(m_position, m_position + 8, PREMATCH)) |
| { |
| m_position += 8; |
| if(have_brace) |
| { |
| if(*m_position == '}') |
| ++m_position; |
| else |
| { |
| m_position -= 8; |
| return false; |
| } |
| } |
| put(this->m_results.prefix()); |
| return true; |
| } |
| if((max_len >= 9) && std::equal(m_position, m_position + 9, POSTMATCH)) |
| { |
| m_position += 9; |
| if(have_brace) |
| { |
| if(*m_position == '}') |
| ++m_position; |
| else |
| { |
| m_position -= 9; |
| return false; |
| } |
| } |
| put(this->m_results.suffix()); |
| return true; |
| } |
| if((max_len >= 16) && std::equal(m_position, m_position + 16, LAST_PAREN_MATCH)) |
| { |
| m_position += 16; |
| if(have_brace) |
| { |
| if(*m_position == '}') |
| ++m_position; |
| else |
| { |
| m_position -= 16; |
| return false; |
| } |
| } |
| put((this->m_results)[this->m_results.size() > 1 ? this->m_results.size() - 1 : 1]); |
| return true; |
| } |
| if((max_len >= 20) && std::equal(m_position, m_position + 20, LAST_SUBMATCH_RESULT)) |
| { |
| m_position += 20; |
| if(have_brace) |
| { |
| if(*m_position == '}') |
| ++m_position; |
| else |
| { |
| m_position -= 20; |
| return false; |
| } |
| } |
| put(this->m_results.get_last_closed_paren()); |
| return true; |
| } |
| if((max_len >= 2) && std::equal(m_position, m_position + 2, LAST_SUBMATCH_RESULT_ALT)) |
| { |
| m_position += 2; |
| if(have_brace) |
| { |
| if(*m_position == '}') |
| ++m_position; |
| else |
| { |
| m_position -= 2; |
| return false; |
| } |
| } |
| put(this->m_results.get_last_closed_paren()); |
| return true; |
| } |
| return false; |
| } |
| |
| template <class OutputIterator, class Results, class traits> |
| void basic_regex_formatter<OutputIterator, Results, traits>::format_escape() |
| { |
| // skip the escape and check for trailing escape: |
| if(++m_position == m_end) |
| { |
| put(static_cast<char_type>('\\')); |
| return; |
| } |
| // now switch on the escape type: |
| switch(*m_position) |
| { |
| case 'a': |
| put(static_cast<char_type>('\a')); |
| ++m_position; |
| break; |
| case 'f': |
| put(static_cast<char_type>('\f')); |
| ++m_position; |
| break; |
| case 'n': |
| put(static_cast<char_type>('\n')); |
| ++m_position; |
| break; |
| case 'r': |
| put(static_cast<char_type>('\r')); |
| ++m_position; |
| break; |
| case 't': |
| put(static_cast<char_type>('\t')); |
| ++m_position; |
| break; |
| case 'v': |
| put(static_cast<char_type>('\v')); |
| ++m_position; |
| break; |
| case 'x': |
| if(++m_position == m_end) |
| { |
| put(static_cast<char_type>('x')); |
| return; |
| } |
| // maybe have \x{ddd} |
| if(*m_position == static_cast<char_type>('{')) |
| { |
| ++m_position; |
| int val = m_traits.toi(m_position, m_end, 16); |
| if(val < 0) |
| { |
| // invalid value treat everything as literals: |
| put(static_cast<char_type>('x')); |
| put(static_cast<char_type>('{')); |
| return; |
| } |
| if(*m_position != static_cast<char_type>('}')) |
| { |
| while(*m_position != static_cast<char_type>('\\')) |
| --m_position; |
| ++m_position; |
| put(*m_position++); |
| return; |
| } |
| ++m_position; |
| put(static_cast<char_type>(val)); |
| return; |
| } |
| else |
| { |
| std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); |
| len = (std::min)(static_cast<std::ptrdiff_t>(2), len); |
| int val = m_traits.toi(m_position, m_position + len, 16); |
| if(val < 0) |
| { |
| --m_position; |
| put(*m_position++); |
| return; |
| } |
| put(static_cast<char_type>(val)); |
| } |
| break; |
| case 'c': |
| if(++m_position == m_end) |
| { |
| --m_position; |
| put(*m_position++); |
| return; |
| } |
| put(static_cast<char_type>(*m_position++ % 32)); |
| break; |
| case 'e': |
| put(static_cast<char_type>(27)); |
| ++m_position; |
| break; |
| default: |
| // see if we have a perl specific escape: |
| if((m_flags & boost::regex_constants::format_sed) == 0) |
| { |
| bool breakout = false; |
| switch(*m_position) |
| { |
| case 'l': |
| ++m_position; |
| m_restore_state = m_state; |
| m_state = output_next_lower; |
| breakout = true; |
| break; |
| case 'L': |
| ++m_position; |
| m_state = output_lower; |
| breakout = true; |
| break; |
| case 'u': |
| ++m_position; |
| m_restore_state = m_state; |
| m_state = output_next_upper; |
| breakout = true; |
| break; |
| case 'U': |
| ++m_position; |
| m_state = output_upper; |
| breakout = true; |
| break; |
| case 'E': |
| ++m_position; |
| m_state = output_copy; |
| breakout = true; |
| break; |
| } |
| if(breakout) |
| break; |
| } |
| // see if we have a \n sed style backreference: |
| int v = m_traits.toi(m_position, m_position+1, 10); |
| if((v > 0) || ((v == 0) && (m_flags & ::boost::regex_constants::format_sed))) |
| { |
| put(m_results[v]); |
| break; |
| } |
| else if(v == 0) |
| { |
| // octal ecape sequence: |
| --m_position; |
| std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); |
| len = (std::min)(static_cast<std::ptrdiff_t>(4), len); |
| v = m_traits.toi(m_position, m_position + len, 8); |
| BOOST_ASSERT(v >= 0); |
| put(static_cast<char_type>(v)); |
| break; |
| } |
| // Otherwise output the character "as is": |
| put(*m_position++); |
| break; |
| } |
| } |
| |
| template <class OutputIterator, class Results, class traits> |
| void basic_regex_formatter<OutputIterator, Results, traits>::format_conditional() |
| { |
| if(m_position == m_end) |
| { |
| // oops trailing '?': |
| put(static_cast<char_type>('?')); |
| return; |
| } |
| int v; |
| if(*m_position == '{') |
| { |
| const char_type* base = m_position; |
| ++m_position; |
| v = m_traits.toi(m_position, m_end, 10); |
| if(v < 0) |
| { |
| // Try a named subexpression: |
| while((m_position != m_end) && (*m_position != '}')) |
| ++m_position; |
| v = m_results.named_subexpression_index(base + 1, m_position); |
| } |
| if((v < 0) || (*m_position != '}')) |
| { |
| m_position = base; |
| // oops trailing '?': |
| put(static_cast<char_type>('?')); |
| return; |
| } |
| // Skip trailing '}': |
| ++m_position; |
| } |
| else |
| { |
| std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); |
| len = (std::min)(static_cast<std::ptrdiff_t>(2), len); |
| v = m_traits.toi(m_position, m_position + len, 10); |
| } |
| if(v < 0) |
| { |
| // oops not a number: |
| put(static_cast<char_type>('?')); |
| return; |
| } |
| |
| // output varies depending upon whether sub-expression v matched or not: |
| if(m_results[v].matched) |
| { |
| m_have_conditional = true; |
| format_all(); |
| m_have_conditional = false; |
| if((m_position != m_end) && (*m_position == static_cast<char_type>(':'))) |
| { |
| // skip the ':': |
| ++m_position; |
| // save output state, then turn it off: |
| output_state saved_state = m_state; |
| m_state = output_none; |
| // format the rest of this scope: |
| format_until_scope_end(); |
| // restore output state: |
| m_state = saved_state; |
| } |
| } |
| else |
| { |
| // save output state, then turn it off: |
| output_state saved_state = m_state; |
| m_state = output_none; |
| // format until ':' or ')': |
| m_have_conditional = true; |
| format_all(); |
| m_have_conditional = false; |
| // restore state: |
| m_state = saved_state; |
| if((m_position != m_end) && (*m_position == static_cast<char_type>(':'))) |
| { |
| // skip the ':': |
| ++m_position; |
| // format the rest of this scope: |
| format_until_scope_end(); |
| } |
| } |
| } |
| |
| template <class OutputIterator, class Results, class traits> |
| void basic_regex_formatter<OutputIterator, Results, traits>::format_until_scope_end() |
| { |
| do |
| { |
| format_all(); |
| if((m_position == m_end) || (*m_position == static_cast<char_type>(')'))) |
| return; |
| put(*m_position++); |
| }while(m_position != m_end); |
| } |
| |
| template <class OutputIterator, class Results, class traits> |
| void basic_regex_formatter<OutputIterator, Results, traits>::put(char_type c) |
| { |
| // write a single character to output |
| // according to which case translation mode we are in: |
| switch(this->m_state) |
| { |
| case output_none: |
| return; |
| case output_next_lower: |
| c = m_traits.tolower(c); |
| this->m_state = m_restore_state; |
| break; |
| case output_next_upper: |
| c = m_traits.toupper(c); |
| this->m_state = m_restore_state; |
| break; |
| case output_lower: |
| c = m_traits.tolower(c); |
| break; |
| case output_upper: |
| c = m_traits.toupper(c); |
| break; |
| default: |
| break; |
| } |
| *m_out = c; |
| ++m_out; |
| } |
| |
| template <class OutputIterator, class Results, class traits> |
| void basic_regex_formatter<OutputIterator, Results, traits>::put(const sub_match_type& sub) |
| { |
| typedef typename sub_match_type::iterator iterator_type; |
| iterator_type i = sub.first; |
| while(i != sub.second) |
| { |
| put(*i); |
| ++i; |
| } |
| } |
| |
| template <class S> |
| class string_out_iterator |
| #ifndef BOOST_NO_STD_ITERATOR |
| : public std::iterator<std::output_iterator_tag, typename S::value_type> |
| #endif |
| { |
| S* out; |
| public: |
| string_out_iterator(S& s) : out(&s) {} |
| string_out_iterator& operator++() { return *this; } |
| string_out_iterator& operator++(int) { return *this; } |
| string_out_iterator& operator*() { return *this; } |
| string_out_iterator& operator=(typename S::value_type v) |
| { |
| out->append(1, v); |
| return *this; |
| } |
| |
| #ifdef BOOST_NO_STD_ITERATOR |
| typedef std::ptrdiff_t difference_type; |
| typedef typename S::value_type value_type; |
| typedef value_type* pointer; |
| typedef value_type& reference; |
| typedef std::output_iterator_tag iterator_category; |
| #endif |
| }; |
| |
| template <class OutputIterator, class Iterator, class Alloc, class charT, class traits> |
| OutputIterator regex_format_imp(OutputIterator out, |
| const match_results<Iterator, Alloc>& m, |
| const charT* p1, const charT* p2, |
| match_flag_type flags, |
| const traits& t |
| ) |
| { |
| if(flags & regex_constants::format_literal) |
| { |
| return re_detail::copy(p1, p2, out); |
| } |
| |
| re_detail::basic_regex_formatter< |
| OutputIterator, |
| match_results<Iterator, Alloc>, |
| traits > f(out, m, t); |
| return f.format(p1, p2, flags); |
| } |
| |
| |
| } // namespace re_detail |
| |
| template <class OutputIterator, class Iterator, class charT> |
| OutputIterator regex_format(OutputIterator out, |
| const match_results<Iterator>& m, |
| const charT* fmt, |
| match_flag_type flags = format_all |
| ) |
| { |
| re_detail::trivial_format_traits<charT> traits; |
| return re_detail::regex_format_imp(out, m, fmt, fmt + traits.length(fmt), flags, traits); |
| } |
| |
| template <class OutputIterator, class Iterator, class charT> |
| OutputIterator regex_format(OutputIterator out, |
| const match_results<Iterator>& m, |
| const std::basic_string<charT>& fmt, |
| match_flag_type flags = format_all |
| ) |
| { |
| re_detail::trivial_format_traits<charT> traits; |
| return re_detail::regex_format_imp(out, m, fmt.data(), fmt.data() + fmt.size(), flags, traits); |
| } |
| |
| template <class Iterator, class charT> |
| std::basic_string<charT> regex_format(const match_results<Iterator>& m, |
| const charT* fmt, |
| match_flag_type flags = format_all) |
| { |
| std::basic_string<charT> result; |
| re_detail::string_out_iterator<std::basic_string<charT> > i(result); |
| re_detail::trivial_format_traits<charT> traits; |
| re_detail::regex_format_imp(i, m, fmt, fmt + traits.length(fmt), flags, traits); |
| return result; |
| } |
| |
| template <class Iterator, class charT> |
| std::basic_string<charT> regex_format(const match_results<Iterator>& m, |
| const std::basic_string<charT>& fmt, |
| match_flag_type flags = format_all) |
| { |
| std::basic_string<charT> result; |
| re_detail::string_out_iterator<std::basic_string<charT> > i(result); |
| re_detail::trivial_format_traits<charT> traits; |
| re_detail::regex_format_imp(i, m, fmt.data(), fmt.data() + fmt.size(), flags, traits); |
| return result; |
| } |
| |
| #ifdef BOOST_MSVC |
| #pragma warning(push) |
| #pragma warning(disable: 4103) |
| #endif |
| #ifdef BOOST_HAS_ABI_HEADERS |
| # include BOOST_ABI_SUFFIX |
| #endif |
| #ifdef BOOST_MSVC |
| #pragma warning(pop) |
| #endif |
| |
| } // namespace boost |
| |
| #endif // BOOST_REGEX_FORMAT_HPP |
| |
| |
| |
| |
| |
| |