regex_compiler.hpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759
  1. ///////////////////////////////////////////////////////////////////////////////
  2. /// \file regex_compiler.hpp
  3. /// Contains the definition of regex_compiler, a factory for building regex objects
  4. /// from strings.
  5. //
  6. // Copyright 2008 Eric Niebler. Distributed under the Boost
  7. // Software License, Version 1.0. (See accompanying file
  8. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. #ifndef BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
  10. #define BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
  11. // MS compatible compilers support #pragma once
  12. #if defined(_MSC_VER)
  13. # pragma once
  14. #endif
  15. #include <map>
  16. #include <boost/config.hpp>
  17. #include <boost/assert.hpp>
  18. #include <boost/next_prior.hpp>
  19. #include <boost/range/begin.hpp>
  20. #include <boost/range/end.hpp>
  21. #include <boost/mpl/assert.hpp>
  22. #include <boost/throw_exception.hpp>
  23. #include <boost/type_traits/is_same.hpp>
  24. #include <boost/type_traits/is_pointer.hpp>
  25. #include <boost/utility/enable_if.hpp>
  26. #include <boost/iterator/iterator_traits.hpp>
  27. #include <boost/xpressive/basic_regex.hpp>
  28. #include <boost/xpressive/detail/dynamic/parser.hpp>
  29. #include <boost/xpressive/detail/dynamic/parse_charset.hpp>
  30. #include <boost/xpressive/detail/dynamic/parser_enum.hpp>
  31. #include <boost/xpressive/detail/dynamic/parser_traits.hpp>
  32. #include <boost/xpressive/detail/core/linker.hpp>
  33. #include <boost/xpressive/detail/core/optimize.hpp>
  34. namespace boost { namespace xpressive
  35. {
  36. ///////////////////////////////////////////////////////////////////////////////
  37. // regex_compiler
  38. //
  39. /// \brief Class template regex_compiler is a factory for building basic_regex objects from a string.
  40. ///
  41. /// Class template regex_compiler is used to construct a basic_regex object from a string. The string
  42. /// should contain a valid regular expression. You can imbue a regex_compiler object with a locale,
  43. /// after which all basic_regex objects created with that regex_compiler object will use that locale.
  44. /// After creating a regex_compiler object, and optionally imbueing it with a locale, you can call the
  45. /// compile() method to construct a basic_regex object, passing it the string representing the regular
  46. /// expression. You can call compile() multiple times on the same regex_compiler object. Two basic_regex
  47. /// objects compiled from the same string will have different regex_id's.
  48. template<typename BidiIter, typename RegexTraits, typename CompilerTraits>
  49. struct regex_compiler
  50. {
  51. typedef BidiIter iterator_type;
  52. typedef typename iterator_value<BidiIter>::type char_type;
  53. typedef regex_constants::syntax_option_type flag_type;
  54. typedef RegexTraits traits_type;
  55. typedef typename traits_type::string_type string_type;
  56. typedef typename traits_type::locale_type locale_type;
  57. typedef typename traits_type::char_class_type char_class_type;
  58. explicit regex_compiler(RegexTraits const &traits = RegexTraits())
  59. : mark_count_(0)
  60. , hidden_mark_count_(0)
  61. , traits_(traits)
  62. , upper_(0)
  63. , self_()
  64. , rules_()
  65. {
  66. this->upper_ = lookup_classname(this->rxtraits(), "upper");
  67. }
  68. ///////////////////////////////////////////////////////////////////////////
  69. // imbue
  70. /// Specify the locale to be used by a regex_compiler.
  71. ///
  72. /// \param loc The locale that this regex_compiler should use.
  73. /// \return The previous locale.
  74. locale_type imbue(locale_type loc)
  75. {
  76. locale_type oldloc = this->traits_.imbue(loc);
  77. this->upper_ = lookup_classname(this->rxtraits(), "upper");
  78. return oldloc;
  79. }
  80. ///////////////////////////////////////////////////////////////////////////
  81. // getloc
  82. /// Get the locale used by a regex_compiler.
  83. ///
  84. /// \return The locale used by this regex_compiler.
  85. locale_type getloc() const
  86. {
  87. return this->traits_.getloc();
  88. }
  89. ///////////////////////////////////////////////////////////////////////////
  90. // compile
  91. /// Builds a basic_regex object from a range of characters.
  92. ///
  93. /// \param begin The beginning of a range of characters representing the
  94. /// regular expression to compile.
  95. /// \param end The end of a range of characters representing the
  96. /// regular expression to compile.
  97. /// \param flags Optional bitmask that determines how the pat string is
  98. /// interpreted. (See syntax_option_type.)
  99. /// \return A basic_regex object corresponding to the regular expression
  100. /// represented by the character range.
  101. /// \pre InputIter is a model of the InputIterator concept.
  102. /// \pre [begin,end) is a valid range.
  103. /// \pre The range of characters specified by [begin,end) contains a
  104. /// valid string-based representation of a regular expression.
  105. /// \throw regex_error when the range of characters has invalid regular
  106. /// expression syntax.
  107. template<typename InputIter>
  108. basic_regex<BidiIter>
  109. compile(InputIter begin, InputIter end, flag_type flags = regex_constants::ECMAScript)
  110. {
  111. typedef typename iterator_category<InputIter>::type category;
  112. return this->compile_(begin, end, flags, category());
  113. }
  114. /// \overload
  115. ///
  116. template<typename InputRange>
  117. typename disable_if<is_pointer<InputRange>, basic_regex<BidiIter> >::type
  118. compile(InputRange const &pat, flag_type flags = regex_constants::ECMAScript)
  119. {
  120. return this->compile(boost::begin(pat), boost::end(pat), flags);
  121. }
  122. /// \overload
  123. ///
  124. basic_regex<BidiIter>
  125. compile(char_type const *begin, flag_type flags = regex_constants::ECMAScript)
  126. {
  127. BOOST_ASSERT(0 != begin);
  128. char_type const *end = begin + std::char_traits<char_type>::length(begin);
  129. return this->compile(begin, end, flags);
  130. }
  131. /// \overload
  132. ///
  133. basic_regex<BidiIter> compile(char_type const *begin, std::size_t size, flag_type flags)
  134. {
  135. BOOST_ASSERT(0 != begin);
  136. char_type const *end = begin + size;
  137. return this->compile(begin, end, flags);
  138. }
  139. ///////////////////////////////////////////////////////////////////////////
  140. // operator[]
  141. /// Return a reference to the named regular expression. If no such named
  142. /// regular expression exists, create a new regular expression and return
  143. /// a reference to it.
  144. ///
  145. /// \param name A std::string containing the name of the regular expression.
  146. /// \pre The string is not empty.
  147. /// \throw bad_alloc on allocation failure.
  148. basic_regex<BidiIter> &operator [](string_type const &name)
  149. {
  150. BOOST_ASSERT(!name.empty());
  151. return this->rules_[name];
  152. }
  153. /// \overload
  154. ///
  155. basic_regex<BidiIter> const &operator [](string_type const &name) const
  156. {
  157. BOOST_ASSERT(!name.empty());
  158. return this->rules_[name];
  159. }
  160. private:
  161. typedef detail::escape_value<char_type, char_class_type> escape_value;
  162. typedef detail::alternate_matcher<detail::alternates_vector<BidiIter>, RegexTraits> alternate_matcher;
  163. ///////////////////////////////////////////////////////////////////////////
  164. // compile_
  165. /// INTERNAL ONLY
  166. template<typename FwdIter>
  167. basic_regex<BidiIter> compile_(FwdIter begin, FwdIter end, flag_type flags, std::forward_iterator_tag)
  168. {
  169. BOOST_MPL_ASSERT((is_same<char_type, typename iterator_value<FwdIter>::type>));
  170. using namespace regex_constants;
  171. this->reset();
  172. this->traits_.flags(flags);
  173. basic_regex<BidiIter> rextmp, *prex = &rextmp;
  174. FwdIter tmp = begin;
  175. // Check if this regex is a named rule:
  176. string_type name;
  177. if(token_group_begin == this->traits_.get_token(tmp, end) &&
  178. BOOST_XPR_ENSURE_(tmp != end, error_paren, "mismatched parenthesis") &&
  179. token_rule_assign == this->traits_.get_group_type(tmp, end, name))
  180. {
  181. begin = tmp;
  182. BOOST_XPR_ENSURE_
  183. (
  184. begin != end && token_group_end == this->traits_.get_token(begin, end)
  185. , error_paren
  186. , "mismatched parenthesis"
  187. );
  188. prex = &this->rules_[name];
  189. }
  190. this->self_ = detail::core_access<BidiIter>::get_regex_impl(*prex);
  191. // at the top level, a regex is a sequence of alternates
  192. detail::sequence<BidiIter> seq = this->parse_alternates(begin, end);
  193. BOOST_XPR_ENSURE_(begin == end, error_paren, "mismatched parenthesis");
  194. // terminate the sequence
  195. seq += detail::make_dynamic<BidiIter>(detail::end_matcher());
  196. // bundle the regex information into a regex_impl object
  197. detail::common_compile(seq.xpr().matchable(), *this->self_, this->rxtraits());
  198. this->self_->traits_ = new detail::traits_holder<RegexTraits>(this->rxtraits());
  199. this->self_->mark_count_ = this->mark_count_;
  200. this->self_->hidden_mark_count_ = this->hidden_mark_count_;
  201. // References changed, update dependencies.
  202. this->self_->tracking_update();
  203. this->self_.reset();
  204. return *prex;
  205. }
  206. ///////////////////////////////////////////////////////////////////////////
  207. // compile_
  208. /// INTERNAL ONLY
  209. template<typename InputIter>
  210. basic_regex<BidiIter> compile_(InputIter begin, InputIter end, flag_type flags, std::input_iterator_tag)
  211. {
  212. string_type pat(begin, end);
  213. return this->compile_(boost::begin(pat), boost::end(pat), flags, std::forward_iterator_tag());
  214. }
  215. ///////////////////////////////////////////////////////////////////////////
  216. // reset
  217. /// INTERNAL ONLY
  218. void reset()
  219. {
  220. this->mark_count_ = 0;
  221. this->hidden_mark_count_ = 0;
  222. this->traits_.flags(regex_constants::ECMAScript);
  223. }
  224. ///////////////////////////////////////////////////////////////////////////
  225. // regex_traits
  226. /// INTERNAL ONLY
  227. traits_type &rxtraits()
  228. {
  229. return this->traits_.traits();
  230. }
  231. ///////////////////////////////////////////////////////////////////////////
  232. // regex_traits
  233. /// INTERNAL ONLY
  234. traits_type const &rxtraits() const
  235. {
  236. return this->traits_.traits();
  237. }
  238. ///////////////////////////////////////////////////////////////////////////
  239. // parse_alternates
  240. /// INTERNAL ONLY
  241. template<typename FwdIter>
  242. detail::sequence<BidiIter> parse_alternates(FwdIter &begin, FwdIter end)
  243. {
  244. using namespace regex_constants;
  245. int count = 0;
  246. FwdIter tmp = begin;
  247. detail::sequence<BidiIter> seq;
  248. do switch(++count)
  249. {
  250. case 1:
  251. seq = this->parse_sequence(tmp, end);
  252. break;
  253. case 2:
  254. seq = detail::make_dynamic<BidiIter>(alternate_matcher()) | seq;
  255. BOOST_FALLTHROUGH;
  256. default:
  257. seq |= this->parse_sequence(tmp, end);
  258. }
  259. while((begin = tmp) != end && token_alternate == this->traits_.get_token(tmp, end));
  260. return seq;
  261. }
  262. ///////////////////////////////////////////////////////////////////////////
  263. // parse_group
  264. /// INTERNAL ONLY
  265. template<typename FwdIter>
  266. detail::sequence<BidiIter> parse_group(FwdIter &begin, FwdIter end)
  267. {
  268. using namespace regex_constants;
  269. int mark_nbr = 0;
  270. bool keeper = false;
  271. bool lookahead = false;
  272. bool lookbehind = false;
  273. bool negative = false;
  274. string_type name;
  275. detail::sequence<BidiIter> seq, seq_end;
  276. FwdIter tmp = FwdIter();
  277. syntax_option_type old_flags = this->traits_.flags();
  278. switch(this->traits_.get_group_type(begin, end, name))
  279. {
  280. case token_no_mark:
  281. // Don't process empty groups like (?:) or (?i)
  282. // BUGBUG this doesn't handle the degenerate (?:)+ correctly
  283. if(token_group_end == this->traits_.get_token(tmp = begin, end))
  284. {
  285. return this->parse_atom(begin = tmp, end);
  286. }
  287. break;
  288. case token_negative_lookahead:
  289. negative = true;
  290. BOOST_FALLTHROUGH;
  291. case token_positive_lookahead:
  292. lookahead = true;
  293. break;
  294. case token_negative_lookbehind:
  295. negative = true;
  296. BOOST_FALLTHROUGH;
  297. case token_positive_lookbehind:
  298. lookbehind = true;
  299. break;
  300. case token_independent_sub_expression:
  301. keeper = true;
  302. break;
  303. case token_comment:
  304. while(BOOST_XPR_ENSURE_(begin != end, error_paren, "mismatched parenthesis"))
  305. {
  306. switch(this->traits_.get_token(begin, end))
  307. {
  308. case token_group_end:
  309. return this->parse_atom(begin, end);
  310. case token_escape:
  311. BOOST_XPR_ENSURE_(begin != end, error_escape, "incomplete escape sequence");
  312. BOOST_FALLTHROUGH;
  313. case token_literal:
  314. ++begin;
  315. break;
  316. default:
  317. break;
  318. }
  319. }
  320. break;
  321. case token_recurse:
  322. BOOST_XPR_ENSURE_
  323. (
  324. begin != end && token_group_end == this->traits_.get_token(begin, end)
  325. , error_paren
  326. , "mismatched parenthesis"
  327. );
  328. return detail::make_dynamic<BidiIter>(detail::regex_byref_matcher<BidiIter>(this->self_));
  329. case token_rule_assign:
  330. BOOST_THROW_EXCEPTION(
  331. regex_error(error_badrule, "rule assignments must be at the front of the regex")
  332. );
  333. break;
  334. case token_rule_ref:
  335. {
  336. typedef detail::core_access<BidiIter> access;
  337. BOOST_XPR_ENSURE_
  338. (
  339. begin != end && token_group_end == this->traits_.get_token(begin, end)
  340. , error_paren
  341. , "mismatched parenthesis"
  342. );
  343. basic_regex<BidiIter> &rex = this->rules_[name];
  344. shared_ptr<detail::regex_impl<BidiIter> > impl = access::get_regex_impl(rex);
  345. this->self_->track_reference(*impl);
  346. return detail::make_dynamic<BidiIter>(detail::regex_byref_matcher<BidiIter>(impl));
  347. }
  348. case token_named_mark:
  349. mark_nbr = static_cast<int>(++this->mark_count_);
  350. for(std::size_t i = 0; i < this->self_->named_marks_.size(); ++i)
  351. {
  352. BOOST_XPR_ENSURE_(this->self_->named_marks_[i].name_ != name, error_badmark, "named mark already exists");
  353. }
  354. this->self_->named_marks_.push_back(detail::named_mark<char_type>(name, this->mark_count_));
  355. seq = detail::make_dynamic<BidiIter>(detail::mark_begin_matcher(mark_nbr));
  356. seq_end = detail::make_dynamic<BidiIter>(detail::mark_end_matcher(mark_nbr));
  357. break;
  358. case token_named_mark_ref:
  359. BOOST_XPR_ENSURE_
  360. (
  361. begin != end && token_group_end == this->traits_.get_token(begin, end)
  362. , error_paren
  363. , "mismatched parenthesis"
  364. );
  365. for(std::size_t i = 0; i < this->self_->named_marks_.size(); ++i)
  366. {
  367. if(this->self_->named_marks_[i].name_ == name)
  368. {
  369. mark_nbr = static_cast<int>(this->self_->named_marks_[i].mark_nbr_);
  370. return detail::make_backref_xpression<BidiIter>
  371. (
  372. mark_nbr, this->traits_.flags(), this->rxtraits()
  373. );
  374. }
  375. }
  376. BOOST_THROW_EXCEPTION(regex_error(error_badmark, "invalid named back-reference"));
  377. break;
  378. default:
  379. mark_nbr = static_cast<int>(++this->mark_count_);
  380. seq = detail::make_dynamic<BidiIter>(detail::mark_begin_matcher(mark_nbr));
  381. seq_end = detail::make_dynamic<BidiIter>(detail::mark_end_matcher(mark_nbr));
  382. break;
  383. }
  384. // alternates
  385. seq += this->parse_alternates(begin, end);
  386. seq += seq_end;
  387. BOOST_XPR_ENSURE_
  388. (
  389. begin != end && token_group_end == this->traits_.get_token(begin, end)
  390. , error_paren
  391. , "mismatched parenthesis"
  392. );
  393. typedef detail::shared_matchable<BidiIter> xpr_type;
  394. if(lookahead)
  395. {
  396. seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
  397. detail::lookahead_matcher<xpr_type> lam(seq.xpr(), negative, seq.pure());
  398. seq = detail::make_dynamic<BidiIter>(lam);
  399. }
  400. else if(lookbehind)
  401. {
  402. seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
  403. detail::lookbehind_matcher<xpr_type> lbm(seq.xpr(), seq.width().value(), negative, seq.pure());
  404. seq = detail::make_dynamic<BidiIter>(lbm);
  405. }
  406. else if(keeper) // independent sub-expression
  407. {
  408. seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
  409. detail::keeper_matcher<xpr_type> km(seq.xpr(), seq.pure());
  410. seq = detail::make_dynamic<BidiIter>(km);
  411. }
  412. // restore the modifiers
  413. this->traits_.flags(old_flags);
  414. return seq;
  415. }
  416. ///////////////////////////////////////////////////////////////////////////
  417. // parse_charset
  418. /// INTERNAL ONLY
  419. template<typename FwdIter>
  420. detail::sequence<BidiIter> parse_charset(FwdIter &begin, FwdIter end)
  421. {
  422. detail::compound_charset<traits_type> chset;
  423. // call out to a helper to actually parse the character set
  424. detail::parse_charset(begin, end, chset, this->traits_);
  425. return detail::make_charset_xpression<BidiIter>
  426. (
  427. chset
  428. , this->rxtraits()
  429. , this->traits_.flags()
  430. );
  431. }
  432. ///////////////////////////////////////////////////////////////////////////
  433. // parse_atom
  434. /// INTERNAL ONLY
  435. template<typename FwdIter>
  436. detail::sequence<BidiIter> parse_atom(FwdIter &begin, FwdIter end)
  437. {
  438. using namespace regex_constants;
  439. escape_value esc = { 0, 0, 0, detail::escape_char };
  440. FwdIter old_begin = begin;
  441. switch(this->traits_.get_token(begin, end))
  442. {
  443. case token_literal:
  444. return detail::make_literal_xpression<BidiIter>
  445. (
  446. this->parse_literal(begin, end), this->traits_.flags(), this->rxtraits()
  447. );
  448. case token_any:
  449. return detail::make_any_xpression<BidiIter>(this->traits_.flags(), this->rxtraits());
  450. case token_assert_begin_sequence:
  451. return detail::make_dynamic<BidiIter>(detail::assert_bos_matcher());
  452. case token_assert_end_sequence:
  453. return detail::make_dynamic<BidiIter>(detail::assert_eos_matcher());
  454. case token_assert_begin_line:
  455. return detail::make_assert_begin_line<BidiIter>(this->traits_.flags(), this->rxtraits());
  456. case token_assert_end_line:
  457. return detail::make_assert_end_line<BidiIter>(this->traits_.flags(), this->rxtraits());
  458. case token_assert_word_boundary:
  459. return detail::make_assert_word<BidiIter>(detail::word_boundary<mpl::true_>(), this->rxtraits());
  460. case token_assert_not_word_boundary:
  461. return detail::make_assert_word<BidiIter>(detail::word_boundary<mpl::false_>(), this->rxtraits());
  462. case token_assert_word_begin:
  463. return detail::make_assert_word<BidiIter>(detail::word_begin(), this->rxtraits());
  464. case token_assert_word_end:
  465. return detail::make_assert_word<BidiIter>(detail::word_end(), this->rxtraits());
  466. case token_escape:
  467. esc = this->parse_escape(begin, end);
  468. switch(esc.type_)
  469. {
  470. case detail::escape_mark:
  471. return detail::make_backref_xpression<BidiIter>
  472. (
  473. esc.mark_nbr_, this->traits_.flags(), this->rxtraits()
  474. );
  475. case detail::escape_char:
  476. return detail::make_char_xpression<BidiIter>
  477. (
  478. esc.ch_, this->traits_.flags(), this->rxtraits()
  479. );
  480. case detail::escape_class:
  481. return detail::make_posix_charset_xpression<BidiIter>
  482. (
  483. esc.class_
  484. , this->is_upper_(*begin++)
  485. , this->traits_.flags()
  486. , this->rxtraits()
  487. );
  488. }
  489. case token_group_begin:
  490. return this->parse_group(begin, end);
  491. case token_charset_begin:
  492. return this->parse_charset(begin, end);
  493. case token_invalid_quantifier:
  494. BOOST_THROW_EXCEPTION(regex_error(error_badrepeat, "quantifier not expected"));
  495. break;
  496. case token_quote_meta_begin:
  497. return detail::make_literal_xpression<BidiIter>
  498. (
  499. this->parse_quote_meta(begin, end), this->traits_.flags(), this->rxtraits()
  500. );
  501. case token_quote_meta_end:
  502. BOOST_THROW_EXCEPTION(
  503. regex_error(
  504. error_escape
  505. , "found quote-meta end without corresponding quote-meta begin"
  506. )
  507. );
  508. break;
  509. case token_end_of_pattern:
  510. break;
  511. default:
  512. begin = old_begin;
  513. break;
  514. }
  515. return detail::sequence<BidiIter>();
  516. }
  517. ///////////////////////////////////////////////////////////////////////////
  518. // parse_quant
  519. /// INTERNAL ONLY
  520. template<typename FwdIter>
  521. detail::sequence<BidiIter> parse_quant(FwdIter &begin, FwdIter end)
  522. {
  523. BOOST_ASSERT(begin != end);
  524. detail::quant_spec spec = { 0, 0, false, &this->hidden_mark_count_ };
  525. detail::sequence<BidiIter> seq = this->parse_atom(begin, end);
  526. // BUGBUG this doesn't handle the degenerate (?:)+ correctly
  527. if(!seq.empty() && begin != end && detail::quant_none != seq.quant())
  528. {
  529. if(this->traits_.get_quant_spec(begin, end, spec))
  530. {
  531. BOOST_ASSERT(spec.min_ <= spec.max_);
  532. if(0 == spec.max_) // quant {0,0} is degenerate -- matches nothing.
  533. {
  534. seq = this->parse_quant(begin, end);
  535. }
  536. else
  537. {
  538. seq.repeat(spec);
  539. }
  540. }
  541. }
  542. return seq;
  543. }
  544. ///////////////////////////////////////////////////////////////////////////
  545. // parse_sequence
  546. /// INTERNAL ONLY
  547. template<typename FwdIter>
  548. detail::sequence<BidiIter> parse_sequence(FwdIter &begin, FwdIter end)
  549. {
  550. detail::sequence<BidiIter> seq;
  551. while(begin != end)
  552. {
  553. detail::sequence<BidiIter> seq_quant = this->parse_quant(begin, end);
  554. // did we find a quantified atom?
  555. if(seq_quant.empty())
  556. break;
  557. // chain it to the end of the xpression sequence
  558. seq += seq_quant;
  559. }
  560. return seq;
  561. }
  562. ///////////////////////////////////////////////////////////////////////////
  563. // parse_literal
  564. // scan ahead looking for char literals to be globbed together into a string literal
  565. /// INTERNAL ONLY
  566. template<typename FwdIter>
  567. string_type parse_literal(FwdIter &begin, FwdIter end)
  568. {
  569. using namespace regex_constants;
  570. BOOST_ASSERT(begin != end);
  571. BOOST_ASSERT(token_literal == this->traits_.get_token(begin, end));
  572. escape_value esc = { 0, 0, 0, detail::escape_char };
  573. string_type literal(1, *begin);
  574. for(FwdIter prev = begin, tmp = ++begin; begin != end; prev = begin, begin = tmp)
  575. {
  576. detail::quant_spec spec = { 0, 0, false, &this->hidden_mark_count_ };
  577. if(this->traits_.get_quant_spec(tmp, end, spec))
  578. {
  579. if(literal.size() != 1)
  580. {
  581. begin = prev;
  582. literal.erase(boost::prior(literal.end()));
  583. }
  584. return literal;
  585. }
  586. else switch(this->traits_.get_token(tmp, end))
  587. {
  588. case token_escape:
  589. esc = this->parse_escape(tmp, end);
  590. if(detail::escape_char != esc.type_) return literal;
  591. literal.insert(literal.end(), esc.ch_);
  592. break;
  593. case token_literal:
  594. literal.insert(literal.end(), *tmp++);
  595. break;
  596. default:
  597. return literal;
  598. }
  599. }
  600. return literal;
  601. }
  602. ///////////////////////////////////////////////////////////////////////////
  603. // parse_quote_meta
  604. // scan ahead looking for char literals to be globbed together into a string literal
  605. /// INTERNAL ONLY
  606. template<typename FwdIter>
  607. string_type parse_quote_meta(FwdIter &begin, FwdIter end)
  608. {
  609. using namespace regex_constants;
  610. FwdIter old_begin = begin, old_end;
  611. while(end != (old_end = begin))
  612. {
  613. switch(this->traits_.get_token(begin, end))
  614. {
  615. case token_quote_meta_end:
  616. return string_type(old_begin, old_end);
  617. case token_escape:
  618. BOOST_XPR_ENSURE_(begin != end, error_escape, "incomplete escape sequence");
  619. BOOST_FALLTHROUGH;
  620. case token_invalid_quantifier:
  621. case token_literal:
  622. ++begin;
  623. break;
  624. default:
  625. break;
  626. }
  627. }
  628. return string_type(old_begin, begin);
  629. }
  630. ///////////////////////////////////////////////////////////////////////////////
  631. // parse_escape
  632. /// INTERNAL ONLY
  633. template<typename FwdIter>
  634. escape_value parse_escape(FwdIter &begin, FwdIter end)
  635. {
  636. BOOST_XPR_ENSURE_(begin != end, regex_constants::error_escape, "incomplete escape sequence");
  637. // first, check to see if this can be a backreference
  638. if(0 < this->rxtraits().value(*begin, 10))
  639. {
  640. // Parse at most 3 decimal digits.
  641. FwdIter tmp = begin;
  642. int mark_nbr = detail::toi(tmp, end, this->rxtraits(), 10, 999);
  643. // If the resulting number could conceivably be a backref, then it is.
  644. if(10 > mark_nbr || mark_nbr <= static_cast<int>(this->mark_count_))
  645. {
  646. begin = tmp;
  647. escape_value esc = {0, mark_nbr, 0, detail::escape_mark};
  648. return esc;
  649. }
  650. }
  651. // Not a backreference, defer to the parse_escape helper
  652. return detail::parse_escape(begin, end, this->traits_);
  653. }
  654. bool is_upper_(char_type ch) const
  655. {
  656. return 0 != this->upper_ && this->rxtraits().isctype(ch, this->upper_);
  657. }
  658. std::size_t mark_count_;
  659. std::size_t hidden_mark_count_;
  660. CompilerTraits traits_;
  661. typename RegexTraits::char_class_type upper_;
  662. shared_ptr<detail::regex_impl<BidiIter> > self_;
  663. std::map<string_type, basic_regex<BidiIter> > rules_;
  664. };
  665. }} // namespace boost::xpressive
  666. #endif