blob: 09b0467b4022ffe65434dea4a8f07c6080dcc0c8 [file] [log] [blame]
/*
*
* Copyright (c) 1998-2004
* John Maddock
*
* Distributed under the Boost Software License, Version 1.0.
* (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*
*/
/*
* LOCATION: see http://www.boost.org/ for most recent version.
* FILE basic_regex.cpp
* VERSION see <boost/version.hpp>
* DESCRIPTION: Declares template class basic_regex.
*/
#ifndef BOOST_REGEX_V4_BASIC_REGEX_HPP
#define BOOST_REGEX_V4_BASIC_REGEX_HPP
#include <boost/type_traits/is_same.hpp>
#include <boost/functional/hash.hpp>
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable: 4103)
#endif
#ifdef BOOST_HAS_ABI_HEADERS
# include BOOST_ABI_PREFIX
#endif
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif
namespace boost{
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable : 4251 4231 4660 4800)
#endif
namespace re_detail{
//
// forward declaration, we will need this one later:
//
template <class charT, class traits>
class basic_regex_parser;
template <class I>
void bubble_down_one(I first, I last)
{
if(first != last)
{
I next = last - 1;
while((next != first) && !(*(next-1) < *next))
{
(next-1)->swap(*next);
--next;
}
}
}
//
// Class named_subexpressions
// Contains information about named subexpressions within the regex.
//
template <class charT>
class named_subexpressions_base
{
public:
virtual int get_id(const charT* i, const charT* j)const = 0;
virtual int get_id(std::size_t hash)const = 0;
#ifdef __GNUC__
// warning supression:
virtual ~named_subexpressions_base(){}
#endif
};
template <class Iterator>
inline std::size_t hash_value_from_capture_name(Iterator i, Iterator j)
{
std::size_t r = boost::hash_range(i, j);
r %= ((std::numeric_limits<int>::max)() - 10001);
r += 10000;
return r;
}
template <class charT>
class named_subexpressions : public named_subexpressions_base<charT>
{
struct name
{
name(const charT* i, const charT* j, int idx)
: /*n(i, j), */ index(idx)
{
hash = hash_value_from_capture_name(i, j);
}
name(std::size_t h, int idx)
: index(idx), hash(h)
{
}
//std::vector<charT> n;
int index;
std::size_t hash;
bool operator < (const name& other)const
{
return hash < other.hash; //std::lexicographical_compare(n.begin(), n.end(), other.n.begin(), other.n.end());
}
bool operator == (const name& other)const
{
return hash == other.hash; //n == other.n;
}
void swap(name& other)
{
//n.swap(other.n);
std::swap(index, other.index);
std::swap(hash, other.hash);
}
};
public:
named_subexpressions(){}
void set_name(const charT* i, const charT* j, int index)
{
m_sub_names.push_back(name(i, j, index));
bubble_down_one(m_sub_names.begin(), m_sub_names.end());
}
int get_id(const charT* i, const charT* j)const
{
name t(i, j, 0);
typename std::vector<name>::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t);
if((pos != m_sub_names.end()) && (*pos == t))
{
return pos->index;
}
return -1;
}
int get_id(std::size_t h)const
{
name t(h, 0);
typename std::vector<name>::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t);
if((pos != m_sub_names.end()) && (*pos == t))
{
return pos->index;
}
return -1;
}
private:
std::vector<name> m_sub_names;
};
template <class charT, class Other>
class named_subexpressions_converter : public named_subexpressions_base<charT>
{
boost::shared_ptr<named_subexpressions<Other> > m_converter;
public:
named_subexpressions_converter(boost::shared_ptr<named_subexpressions<Other> > s)
: m_converter(s) {}
int get_id(const charT* i, const charT* j)const
{
if(i == j)
return -1;
std::vector<Other> v;
while(i != j)
{
v.push_back(*i);
++i;
}
return m_converter->get_id(&v[0], &v[0] + v.size());
}
int get_id(std::size_t h)const
{
return m_converter->get_id(h);
}
};
template <class To>
inline boost::shared_ptr<named_subexpressions_base<To> > convert_to_named_subs_imp(
boost::shared_ptr<named_subexpressions<To> > s,
boost::integral_constant<bool,true> const&)
{
return s;
}
template <class To, class From>
inline boost::shared_ptr<named_subexpressions_base<To> > convert_to_named_subs_imp(
boost::shared_ptr<named_subexpressions<From> > s,
boost::integral_constant<bool,false> const&)
{
return boost::shared_ptr<named_subexpressions_converter<To, From> >(new named_subexpressions_converter<To, From>(s));
}
template <class To, class From>
inline boost::shared_ptr<named_subexpressions_base<To> > convert_to_named_subs(
boost::shared_ptr<named_subexpressions<From> > s)
{
typedef typename boost::is_same<To, From>::type tag_type;
return convert_to_named_subs_imp<To>(s, tag_type());
}
//
// class regex_data:
// represents the data we wish to expose to the matching algorithms.
//
template <class charT, class traits>
struct regex_data : public named_subexpressions<charT>
{
typedef regex_constants::syntax_option_type flag_type;
typedef std::size_t size_type;
regex_data(const ::boost::shared_ptr<
::boost::regex_traits_wrapper<traits> >& t)
: m_ptraits(t), m_expression(0), m_expression_len(0) {}
regex_data()
: m_ptraits(new ::boost::regex_traits_wrapper<traits>()), m_expression(0), m_expression_len(0) {}
::boost::shared_ptr<
::boost::regex_traits_wrapper<traits>
> m_ptraits; // traits class instance
flag_type m_flags; // flags with which we were compiled
int m_status; // error code (0 implies OK).
const charT* m_expression; // the original expression
std::ptrdiff_t m_expression_len; // the length of the original expression
size_type m_mark_count; // the number of marked sub-expressions
re_detail::re_syntax_base* m_first_state; // the first state of the machine
unsigned m_restart_type; // search optimisation type
unsigned char m_startmap[1 << CHAR_BIT]; // which characters can start a match
unsigned int m_can_be_null; // whether we can match a null string
re_detail::raw_storage m_data; // the buffer in which our states are constructed
typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
std::vector<
std::pair<
std::size_t, std::size_t> > m_subs; // Position of sub-expressions within the *string*.
bool m_has_recursions; // whether we have recursive expressions;
};
//
// class basic_regex_implementation
// pimpl implementation class for basic_regex.
//
template <class charT, class traits>
class basic_regex_implementation
: public regex_data<charT, traits>
{
public:
typedef regex_constants::syntax_option_type flag_type;
typedef std::ptrdiff_t difference_type;
typedef std::size_t size_type;
typedef typename traits::locale_type locale_type;
typedef const charT* const_iterator;
basic_regex_implementation(){}
basic_regex_implementation(const ::boost::shared_ptr<
::boost::regex_traits_wrapper<traits> >& t)
: regex_data<charT, traits>(t) {}
void assign(const charT* arg_first,
const charT* arg_last,
flag_type f)
{
regex_data<charT, traits>* pdat = this;
basic_regex_parser<charT, traits> parser(pdat);
parser.parse(arg_first, arg_last, f);
}
locale_type BOOST_REGEX_CALL imbue(locale_type l)
{
return this->m_ptraits->imbue(l);
}
locale_type BOOST_REGEX_CALL getloc()const
{
return this->m_ptraits->getloc();
}
std::basic_string<charT> BOOST_REGEX_CALL str()const
{
std::basic_string<charT> result;
if(this->m_status == 0)
result = std::basic_string<charT>(this->m_expression, this->m_expression_len);
return result;
}
const_iterator BOOST_REGEX_CALL expression()const
{
return this->m_expression;
}
std::pair<const_iterator, const_iterator> BOOST_REGEX_CALL subexpression(std::size_t n)const
{
if(n == 0)
throw std::out_of_range("0 is not a valid subexpression index.");
const std::pair<std::size_t, std::size_t>& pi = this->m_subs.at(n - 1);
std::pair<const_iterator, const_iterator> p(expression() + pi.first, expression() + pi.second);
return p;
}
//
// begin, end:
const_iterator BOOST_REGEX_CALL begin()const
{
return (!this->m_status ? 0 : this->m_expression);
}
const_iterator BOOST_REGEX_CALL end()const
{
return (!this->m_status ? 0 : this->m_expression + this->m_expression_len);
}
flag_type BOOST_REGEX_CALL flags()const
{
return this->m_flags;
}
size_type BOOST_REGEX_CALL size()const
{
return this->m_expression_len;
}
int BOOST_REGEX_CALL status()const
{
return this->m_status;
}
size_type BOOST_REGEX_CALL mark_count()const
{
return this->m_mark_count;
}
const re_detail::re_syntax_base* get_first_state()const
{
return this->m_first_state;
}
unsigned get_restart_type()const
{
return this->m_restart_type;
}
const unsigned char* get_map()const
{
return this->m_startmap;
}
const ::boost::regex_traits_wrapper<traits>& get_traits()const
{
return *(this->m_ptraits);
}
bool can_be_null()const
{
return this->m_can_be_null;
}
const regex_data<charT, traits>& get_data()const
{
basic_regex_implementation<charT, traits> const* p = this;
return *static_cast<const regex_data<charT, traits>*>(p);
}
};
} // namespace re_detail
//
// class basic_regex:
// represents the compiled
// regular expression:
//
#ifdef BOOST_REGEX_NO_FWD
template <class charT, class traits = regex_traits<charT> >
#else
template <class charT, class traits >
#endif
class basic_regex : public regbase
{
public:
// typedefs:
typedef std::size_t traits_size_type;
typedef typename traits::string_type traits_string_type;
typedef charT char_type;
typedef traits traits_type;
typedef charT value_type;
typedef charT& reference;
typedef const charT& const_reference;
typedef const charT* const_iterator;
typedef const_iterator iterator;
typedef std::ptrdiff_t difference_type;
typedef std::size_t size_type;
typedef regex_constants::syntax_option_type flag_type;
// locale_type
// placeholder for actual locale type used by the
// traits class to localise *this.
typedef typename traits::locale_type locale_type;
public:
explicit basic_regex(){}
explicit basic_regex(const charT* p, flag_type f = regex_constants::normal)
{
assign(p, f);
}
basic_regex(const charT* p1, const charT* p2, flag_type f = regex_constants::normal)
{
assign(p1, p2, f);
}
basic_regex(const charT* p, size_type len, flag_type f)
{
assign(p, len, f);
}
basic_regex(const basic_regex& that)
: m_pimpl(that.m_pimpl) {}
~basic_regex(){}
basic_regex& BOOST_REGEX_CALL operator=(const basic_regex& that)
{
return assign(that);
}
basic_regex& BOOST_REGEX_CALL operator=(const charT* ptr)
{
return assign(ptr);
}
//
// assign:
basic_regex& assign(const basic_regex& that)
{
m_pimpl = that.m_pimpl;
return *this;
}
basic_regex& assign(const charT* p, flag_type f = regex_constants::normal)
{
return assign(p, p + traits::length(p), f);
}
basic_regex& assign(const charT* p, size_type len, flag_type f)
{
return assign(p, p + len, f);
}
private:
basic_regex& do_assign(const charT* p1,
const charT* p2,
flag_type f);
public:
basic_regex& assign(const charT* p1,
const charT* p2,
flag_type f = regex_constants::normal)
{
return do_assign(p1, p2, f);
}
#if !defined(BOOST_NO_MEMBER_TEMPLATES)
template <class ST, class SA>
unsigned int BOOST_REGEX_CALL set_expression(const std::basic_string<charT, ST, SA>& p, flag_type f = regex_constants::normal)
{
return set_expression(p.data(), p.data() + p.size(), f);
}
template <class ST, class SA>
explicit basic_regex(const std::basic_string<charT, ST, SA>& p, flag_type f = regex_constants::normal)
{
assign(p, f);
}
template <class InputIterator>
basic_regex(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal)
{
typedef typename traits::string_type seq_type;
seq_type a(arg_first, arg_last);
if(a.size())
assign(&*a.begin(), &*a.begin() + a.size(), f);
else
assign(static_cast<const charT*>(0), static_cast<const charT*>(0), f);
}
template <class ST, class SA>
basic_regex& BOOST_REGEX_CALL operator=(const std::basic_string<charT, ST, SA>& p)
{
return assign(p.data(), p.data() + p.size(), regex_constants::normal);
}
template <class string_traits, class A>
basic_regex& BOOST_REGEX_CALL assign(
const std::basic_string<charT, string_traits, A>& s,
flag_type f = regex_constants::normal)
{
return assign(s.data(), s.data() + s.size(), f);
}
template <class InputIterator>
basic_regex& BOOST_REGEX_CALL assign(InputIterator arg_first,
InputIterator arg_last,
flag_type f = regex_constants::normal)
{
typedef typename traits::string_type seq_type;
seq_type a(arg_first, arg_last);
if(a.size())
{
const charT* p1 = &*a.begin();
const charT* p2 = &*a.begin() + a.size();
return assign(p1, p2, f);
}
return assign(static_cast<const charT*>(0), static_cast<const charT*>(0), f);
}
#else
unsigned int BOOST_REGEX_CALL set_expression(const std::basic_string<charT>& p, flag_type f = regex_constants::normal)
{
return set_expression(p.data(), p.data() + p.size(), f);
}
basic_regex(const std::basic_string<charT>& p, flag_type f = regex_constants::normal)
{
assign(p, f);
}
basic_regex& BOOST_REGEX_CALL operator=(const std::basic_string<charT>& p)
{
return assign(p.data(), p.data() + p.size(), regex_constants::normal);
}
basic_regex& BOOST_REGEX_CALL assign(
const std::basic_string<charT>& s,
flag_type f = regex_constants::normal)
{
return assign(s.data(), s.data() + s.size(), f);
}
#endif
//
// locale:
locale_type BOOST_REGEX_CALL imbue(locale_type l);
locale_type BOOST_REGEX_CALL getloc()const
{
return m_pimpl.get() ? m_pimpl->getloc() : locale_type();
}
//
// getflags:
// retained for backwards compatibility only, "flags"
// is now the preferred name:
flag_type BOOST_REGEX_CALL getflags()const
{
return flags();
}
flag_type BOOST_REGEX_CALL flags()const
{
return m_pimpl.get() ? m_pimpl->flags() : 0;
}
//
// str:
std::basic_string<charT> BOOST_REGEX_CALL str()const
{
return m_pimpl.get() ? m_pimpl->str() : std::basic_string<charT>();
}
//
// begin, end, subexpression:
std::pair<const_iterator, const_iterator> BOOST_REGEX_CALL subexpression(std::size_t n)const
{
if(!m_pimpl.get())
throw std::logic_error("Can't access subexpressions in an invalid regex.");
return m_pimpl->subexpression(n);
}
const_iterator BOOST_REGEX_CALL begin()const
{
return (m_pimpl.get() ? m_pimpl->begin() : 0);
}
const_iterator BOOST_REGEX_CALL end()const
{
return (m_pimpl.get() ? m_pimpl->end() : 0);
}
//
// swap:
void BOOST_REGEX_CALL swap(basic_regex& that)throw()
{
m_pimpl.swap(that.m_pimpl);
}
//
// size:
size_type BOOST_REGEX_CALL size()const
{
return (m_pimpl.get() ? m_pimpl->size() : 0);
}
//
// max_size:
size_type BOOST_REGEX_CALL max_size()const
{
return UINT_MAX;
}
//
// empty:
bool BOOST_REGEX_CALL empty()const
{
return (m_pimpl.get() ? 0 != m_pimpl->status() : true);
}
size_type BOOST_REGEX_CALL mark_count()const
{
return (m_pimpl.get() ? m_pimpl->mark_count() : 0);
}
int status()const
{
return (m_pimpl.get() ? m_pimpl->status() : regex_constants::error_empty);
}
int BOOST_REGEX_CALL compare(const basic_regex& that) const
{
if(m_pimpl.get() == that.m_pimpl.get())
return 0;
if(!m_pimpl.get())
return -1;
if(!that.m_pimpl.get())
return 1;
if(status() != that.status())
return status() - that.status();
if(flags() != that.flags())
return flags() - that.flags();
return str().compare(that.str());
}
bool BOOST_REGEX_CALL operator==(const basic_regex& e)const
{
return compare(e) == 0;
}
bool BOOST_REGEX_CALL operator != (const basic_regex& e)const
{
return compare(e) != 0;
}
bool BOOST_REGEX_CALL operator<(const basic_regex& e)const
{
return compare(e) < 0;
}
bool BOOST_REGEX_CALL operator>(const basic_regex& e)const
{
return compare(e) > 0;
}
bool BOOST_REGEX_CALL operator<=(const basic_regex& e)const
{
return compare(e) <= 0;
}
bool BOOST_REGEX_CALL operator>=(const basic_regex& e)const
{
return compare(e) >= 0;
}
//
// The following are deprecated as public interfaces
// but are available for compatibility with earlier versions.
const charT* BOOST_REGEX_CALL expression()const
{
return (m_pimpl.get() && !m_pimpl->status() ? m_pimpl->expression() : 0);
}
unsigned int BOOST_REGEX_CALL set_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal)
{
assign(p1, p2, f | regex_constants::no_except);
return status();
}
unsigned int BOOST_REGEX_CALL set_expression(const charT* p, flag_type f = regex_constants::normal)
{
assign(p, f | regex_constants::no_except);
return status();
}
unsigned int BOOST_REGEX_CALL error_code()const
{
return status();
}
//
// private access methods:
//
const re_detail::re_syntax_base* get_first_state()const
{
BOOST_ASSERT(0 != m_pimpl.get());
return m_pimpl->get_first_state();
}
unsigned get_restart_type()const
{
BOOST_ASSERT(0 != m_pimpl.get());
return m_pimpl->get_restart_type();
}
const unsigned char* get_map()const
{
BOOST_ASSERT(0 != m_pimpl.get());
return m_pimpl->get_map();
}
const ::boost::regex_traits_wrapper<traits>& get_traits()const
{
BOOST_ASSERT(0 != m_pimpl.get());
return m_pimpl->get_traits();
}
bool can_be_null()const
{
BOOST_ASSERT(0 != m_pimpl.get());
return m_pimpl->can_be_null();
}
const re_detail::regex_data<charT, traits>& get_data()const
{
BOOST_ASSERT(0 != m_pimpl.get());
return m_pimpl->get_data();
}
boost::shared_ptr<re_detail::named_subexpressions<charT> > get_named_subs()const
{
return m_pimpl;
}
private:
shared_ptr<re_detail::basic_regex_implementation<charT, traits> > m_pimpl;
};
//
// out of line members;
// these are the only members that mutate the basic_regex object,
// and are designed to provide the strong exception guarentee
// (in the event of a throw, the state of the object remains unchanged).
//
template <class charT, class traits>
basic_regex<charT, traits>& basic_regex<charT, traits>::do_assign(const charT* p1,
const charT* p2,
flag_type f)
{
shared_ptr<re_detail::basic_regex_implementation<charT, traits> > temp;
if(!m_pimpl.get())
{
temp = shared_ptr<re_detail::basic_regex_implementation<charT, traits> >(new re_detail::basic_regex_implementation<charT, traits>());
}
else
{
temp = shared_ptr<re_detail::basic_regex_implementation<charT, traits> >(new re_detail::basic_regex_implementation<charT, traits>(m_pimpl->m_ptraits));
}
temp->assign(p1, p2, f);
temp.swap(m_pimpl);
return *this;
}
template <class charT, class traits>
typename basic_regex<charT, traits>::locale_type BOOST_REGEX_CALL basic_regex<charT, traits>::imbue(locale_type l)
{
shared_ptr<re_detail::basic_regex_implementation<charT, traits> > temp(new re_detail::basic_regex_implementation<charT, traits>());
locale_type result = temp->imbue(l);
temp.swap(m_pimpl);
return result;
}
//
// non-members:
//
template <class charT, class traits>
void swap(basic_regex<charT, traits>& e1, basic_regex<charT, traits>& e2)
{
e1.swap(e2);
}
#ifndef BOOST_NO_STD_LOCALE
template <class charT, class traits, class traits2>
std::basic_ostream<charT, traits>&
operator << (std::basic_ostream<charT, traits>& os,
const basic_regex<charT, traits2>& e)
{
return (os << e.str());
}
#else
template <class traits>
std::ostream& operator << (std::ostream& os, const basic_regex<char, traits>& e)
{
return (os << e.str());
}
#endif
//
// class reg_expression:
// this is provided for backwards compatibility only,
// it is deprecated, no not use!
//
#ifdef BOOST_REGEX_NO_FWD
template <class charT, class traits = regex_traits<charT> >
#else
template <class charT, class traits >
#endif
class reg_expression : public basic_regex<charT, traits>
{
public:
typedef typename basic_regex<charT, traits>::flag_type flag_type;
typedef typename basic_regex<charT, traits>::size_type size_type;
explicit reg_expression(){}
explicit reg_expression(const charT* p, flag_type f = regex_constants::normal)
: basic_regex<charT, traits>(p, f){}
reg_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal)
: basic_regex<charT, traits>(p1, p2, f){}
reg_expression(const charT* p, size_type len, flag_type f)
: basic_regex<charT, traits>(p, len, f){}
reg_expression(const reg_expression& that)
: basic_regex<charT, traits>(that) {}
~reg_expression(){}
reg_expression& BOOST_REGEX_CALL operator=(const reg_expression& that)
{
return this->assign(that);
}
#if !defined(BOOST_NO_MEMBER_TEMPLATES)
template <class ST, class SA>
explicit reg_expression(const std::basic_string<charT, ST, SA>& p, flag_type f = regex_constants::normal)
: basic_regex<charT, traits>(p, f)
{
}
template <class InputIterator>
reg_expression(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal)
: basic_regex<charT, traits>(arg_first, arg_last, f)
{
}
template <class ST, class SA>
reg_expression& BOOST_REGEX_CALL operator=(const std::basic_string<charT, ST, SA>& p)
{
this->assign(p);
return *this;
}
#else
explicit reg_expression(const std::basic_string<charT>& p, flag_type f = regex_constants::normal)
: basic_regex<charT, traits>(p, f)
{
}
reg_expression& BOOST_REGEX_CALL operator=(const std::basic_string<charT>& p)
{
this->assign(p);
return *this;
}
#endif
};
#ifdef BOOST_MSVC
#pragma warning (pop)
#endif
} // namespace boost
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable: 4103)
#endif
#ifdef BOOST_HAS_ABI_HEADERS
# include BOOST_ABI_SUFFIX
#endif
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif
#endif