basic_regex.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. ///////////////////////////////////////////////////////////////////////////////
  2. /// \file basic_regex.hpp
  3. /// Contains the definition of the basic_regex\<\> class template and its
  4. /// associated helper functions.
  5. //
  6. // Copyright 2008 Eric Niebler. Distributed under the Boost
  7. // Software License, Version 1.0. (See accompanying file
  8. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. #ifndef BOOST_XPRESSIVE_BASIC_REGEX_HPP_EAN_10_04_2005
  10. #define BOOST_XPRESSIVE_BASIC_REGEX_HPP_EAN_10_04_2005
  11. // MS compatible compilers support #pragma once
  12. #if defined(_MSC_VER)
  13. # pragma once
  14. #endif
  15. #include <boost/config.hpp>
  16. #include <boost/mpl/bool.hpp>
  17. #include <boost/xpressive/xpressive_fwd.hpp>
  18. #include <boost/xpressive/regex_constants.hpp>
  19. #include <boost/xpressive/detail/detail_fwd.hpp>
  20. #include <boost/xpressive/detail/core/regex_impl.hpp>
  21. #include <boost/xpressive/detail/core/regex_domain.hpp>
  22. // Doxygen can't handle proto :-(
  23. #ifndef BOOST_XPRESSIVE_DOXYGEN_INVOKED
  24. # include <boost/xpressive/detail/static/grammar.hpp>
  25. # include <boost/proto/extends.hpp>
  26. #endif
  27. #if BOOST_XPRESSIVE_HAS_MS_STACK_GUARD
  28. # include <excpt.h> // for _exception_code()
  29. # include <malloc.h> // for _resetstkoflw()
  30. #endif
  31. namespace boost { namespace xpressive
  32. {
  33. namespace detail
  34. {
  35. inline void throw_on_stack_error(bool stack_error)
  36. {
  37. BOOST_XPR_ENSURE_(!stack_error, regex_constants::error_stack, "Regex stack space exhausted");
  38. }
  39. }
  40. ///////////////////////////////////////////////////////////////////////////////
  41. // basic_regex
  42. //
  43. /// \brief Class template basic_regex\<\> is a class for holding a compiled regular expression.
  44. template<typename BidiIter>
  45. struct basic_regex
  46. : proto::extends<
  47. proto::expr<proto::tag::terminal, proto::term<detail::tracking_ptr<detail::regex_impl<BidiIter> > >, 0>
  48. , basic_regex<BidiIter>
  49. , detail::regex_domain
  50. >
  51. {
  52. private:
  53. typedef proto::expr<proto::tag::terminal, proto::term<detail::tracking_ptr<detail::regex_impl<BidiIter> > >, 0> pimpl_type;
  54. typedef proto::extends<pimpl_type, basic_regex<BidiIter>, detail::regex_domain> base_type;
  55. public:
  56. typedef BidiIter iterator_type;
  57. typedef typename iterator_value<BidiIter>::type char_type;
  58. // For compatibility with std::basic_regex
  59. typedef typename iterator_value<BidiIter>::type value_type;
  60. typedef typename detail::string_type<char_type>::type string_type;
  61. typedef regex_constants::syntax_option_type flag_type;
  62. BOOST_STATIC_CONSTANT(regex_constants::syntax_option_type, ECMAScript = regex_constants::ECMAScript);
  63. BOOST_STATIC_CONSTANT(regex_constants::syntax_option_type, icase = regex_constants::icase_);
  64. BOOST_STATIC_CONSTANT(regex_constants::syntax_option_type, nosubs = regex_constants::nosubs);
  65. BOOST_STATIC_CONSTANT(regex_constants::syntax_option_type, optimize = regex_constants::optimize);
  66. BOOST_STATIC_CONSTANT(regex_constants::syntax_option_type, collate = regex_constants::collate);
  67. BOOST_STATIC_CONSTANT(regex_constants::syntax_option_type, single_line = regex_constants::single_line);
  68. BOOST_STATIC_CONSTANT(regex_constants::syntax_option_type, not_dot_null = regex_constants::not_dot_null);
  69. BOOST_STATIC_CONSTANT(regex_constants::syntax_option_type, not_dot_newline = regex_constants::not_dot_newline);
  70. BOOST_STATIC_CONSTANT(regex_constants::syntax_option_type, ignore_white_space = regex_constants::ignore_white_space);
  71. /// \post regex_id() == 0
  72. /// \post mark_count() == 0
  73. basic_regex()
  74. : base_type()
  75. {
  76. }
  77. /// \param that The basic_regex object to copy.
  78. /// \post regex_id() == that.regex_id()
  79. /// \post mark_count() == that.mark_count()
  80. basic_regex(basic_regex<BidiIter> const &that)
  81. : base_type(that)
  82. {
  83. }
  84. /// \param that The basic_regex object to copy.
  85. /// \post regex_id() == that.regex_id()
  86. /// \post mark_count() == that.mark_count()
  87. /// \return *this
  88. basic_regex<BidiIter> &operator =(basic_regex<BidiIter> const &that)
  89. {
  90. proto::value(*this) = proto::value(that);
  91. return *this;
  92. }
  93. /// Construct from a static regular expression.
  94. ///
  95. /// \param expr The static regular expression
  96. /// \pre Expr is the type of a static regular expression.
  97. /// \post regex_id() != 0
  98. /// \post mark_count() \>= 0
  99. template<typename Expr>
  100. basic_regex(Expr const &expr)
  101. : base_type()
  102. {
  103. BOOST_XPRESSIVE_CHECK_REGEX(Expr, char_type);
  104. this->compile_(expr, is_valid_regex<Expr, char_type>());
  105. }
  106. /// Construct from a static regular expression.
  107. ///
  108. /// \param expr The static regular expression.
  109. /// \pre Expr is the type of a static regular expression.
  110. /// \post regex_id() != 0
  111. /// \post mark_count() \>= 0
  112. /// \throw std::bad_alloc on out of memory
  113. /// \return *this
  114. template<typename Expr>
  115. basic_regex<BidiIter> &operator =(Expr const &expr)
  116. {
  117. BOOST_XPRESSIVE_CHECK_REGEX(Expr, char_type);
  118. this->compile_(expr, is_valid_regex<Expr, char_type>());
  119. return *this;
  120. }
  121. /// Returns the count of capturing sub-expressions in this regular expression
  122. ///
  123. std::size_t mark_count() const
  124. {
  125. return proto::value(*this) ? proto::value(*this)->mark_count_ : 0;
  126. }
  127. /// Returns a token which uniquely identifies this regular expression.
  128. ///
  129. regex_id_type regex_id() const
  130. {
  131. return proto::value(*this) ? proto::value(*this)->xpr_.get() : 0;
  132. }
  133. /// Swaps the contents of this basic_regex object with another.
  134. ///
  135. /// \param that The other basic_regex object.
  136. /// \attention This is a shallow swap that does not do reference tracking.
  137. /// If you embed a basic_regex object by reference in another
  138. /// regular expression and then swap its contents with another
  139. /// basic_regex object, the change will not be visible to the
  140. /// enclosing regular expression. It is done this way to ensure
  141. /// that swap() cannot throw.
  142. /// \throw nothrow
  143. void swap(basic_regex<BidiIter> &that) // throw()
  144. {
  145. proto::value(*this).swap(proto::value(that));
  146. }
  147. /// Factory method for building a regex object from a range of characters.
  148. /// Equivalent to regex_compiler\< BidiIter \>().compile(begin, end, flags);
  149. ///
  150. /// \param begin The beginning of a range of characters representing the
  151. /// regular expression to compile.
  152. /// \param end The end of a range of characters representing the
  153. /// regular expression to compile.
  154. /// \param flags Optional bitmask that determines how the pat string is
  155. /// interpreted. (See syntax_option_type.)
  156. /// \return A basic_regex object corresponding to the regular expression
  157. /// represented by the character range.
  158. /// \pre [begin,end) is a valid range.
  159. /// \pre The range of characters specified by [begin,end) contains a
  160. /// valid string-based representation of a regular expression.
  161. /// \throw regex_error when the range of characters has invalid regular
  162. /// expression syntax.
  163. template<typename InputIter>
  164. static basic_regex<BidiIter> compile(InputIter begin, InputIter end, flag_type flags = regex_constants::ECMAScript)
  165. {
  166. return regex_compiler<BidiIter>().compile(begin, end, flags);
  167. }
  168. /// \overload
  169. ///
  170. template<typename InputRange>
  171. static basic_regex<BidiIter> compile(InputRange const &pat, flag_type flags = regex_constants::ECMAScript)
  172. {
  173. return regex_compiler<BidiIter>().compile(pat, flags);
  174. }
  175. /// \overload
  176. ///
  177. static basic_regex<BidiIter> compile(char_type const *begin, flag_type flags = regex_constants::ECMAScript)
  178. {
  179. return regex_compiler<BidiIter>().compile(begin, flags);
  180. }
  181. /// \overload
  182. ///
  183. static basic_regex<BidiIter> compile(char_type const *begin, std::size_t len, flag_type flags)
  184. {
  185. return regex_compiler<BidiIter>().compile(begin, len, flags);
  186. }
  187. private:
  188. friend struct detail::core_access<BidiIter>;
  189. // Avoid a common programming mistake. Construction from a string is
  190. // ambiguous. It could mean:
  191. // sregex rx = sregex::compile(str); // compile the string into a regex
  192. // or
  193. // sregex rx = as_xpr(str); // treat the string as a literal
  194. // Since there is no easy way to disambiguate, it is disallowed. You must
  195. // say what you mean.
  196. /// INTERNAL ONLY
  197. basic_regex(char_type const *);
  198. /// INTERNAL ONLY
  199. basic_regex(string_type const &);
  200. /// INTERNAL ONLY
  201. bool match_(detail::match_state<BidiIter> &state) const
  202. {
  203. #if BOOST_XPRESSIVE_HAS_MS_STACK_GUARD
  204. bool success = false, stack_error = false;
  205. __try
  206. {
  207. success = proto::value(*this)->xpr_->match(state);
  208. }
  209. __except(_exception_code() == 0xC00000FDUL)
  210. {
  211. stack_error = true;
  212. _resetstkoflw();
  213. }
  214. detail::throw_on_stack_error(stack_error);
  215. return success;
  216. #else
  217. return proto::value(*this)->xpr_->match(state);
  218. #endif
  219. }
  220. // Compiles valid static regexes into a state machine.
  221. /// INTERNAL ONLY
  222. template<typename Expr>
  223. void compile_(Expr const &expr, mpl::true_)
  224. {
  225. detail::static_compile(expr, proto::value(*this).get());
  226. }
  227. // No-op for invalid static regexes.
  228. /// INTERNAL ONLY
  229. template<typename Expr>
  230. void compile_(Expr const &, mpl::false_)
  231. {
  232. }
  233. };
  234. #ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION
  235. template<typename BidiIter> regex_constants::syntax_option_type const basic_regex<BidiIter>::ECMAScript;
  236. template<typename BidiIter> regex_constants::syntax_option_type const basic_regex<BidiIter>::icase;
  237. template<typename BidiIter> regex_constants::syntax_option_type const basic_regex<BidiIter>::nosubs;
  238. template<typename BidiIter> regex_constants::syntax_option_type const basic_regex<BidiIter>::optimize;
  239. template<typename BidiIter> regex_constants::syntax_option_type const basic_regex<BidiIter>::collate;
  240. template<typename BidiIter> regex_constants::syntax_option_type const basic_regex<BidiIter>::single_line;
  241. template<typename BidiIter> regex_constants::syntax_option_type const basic_regex<BidiIter>::not_dot_null;
  242. template<typename BidiIter> regex_constants::syntax_option_type const basic_regex<BidiIter>::not_dot_newline;
  243. template<typename BidiIter> regex_constants::syntax_option_type const basic_regex<BidiIter>::ignore_white_space;
  244. #endif
  245. ///////////////////////////////////////////////////////////////////////////////
  246. // swap
  247. /// \brief Swaps the contents of two basic_regex objects.
  248. /// \param left The first basic_regex object.
  249. /// \param right The second basic_regex object.
  250. /// \attention This is a shallow swap that does not do reference tracking.
  251. /// If you embed a basic_regex object by reference in another
  252. /// regular expression and then swap its contents with another
  253. /// basic_regex object, the change will not be visible to the
  254. /// enclosing regular expression. It is done this way to ensure
  255. /// that swap() cannot throw.
  256. /// \throw nothrow
  257. template<typename BidiIter>
  258. inline void swap(basic_regex<BidiIter> &left, basic_regex<BidiIter> &right) // throw()
  259. {
  260. left.swap(right);
  261. }
  262. }} // namespace boost::xpressive
  263. #endif // BOOST_XPRESSIVE_BASIC_REGEX_HPP_EAN_10_04_2005