perl_matcher_common.hpp 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020
  1. /*
  2. *
  3. * Copyright (c) 2002
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE perl_matcher_common.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Definitions of perl_matcher member functions that are
  16. * common to both the recursive and non-recursive versions.
  17. */
  18. #ifndef BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP
  19. #define BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP
  20. #ifdef BOOST_MSVC
  21. #pragma warning(push)
  22. #pragma warning(disable: 4103)
  23. #endif
  24. #ifdef BOOST_HAS_ABI_HEADERS
  25. # include BOOST_ABI_PREFIX
  26. #endif
  27. #ifdef BOOST_MSVC
  28. #pragma warning(pop)
  29. #endif
  30. #ifdef __BORLANDC__
  31. # pragma option push -w-8008 -w-8066
  32. #endif
  33. #ifdef BOOST_MSVC
  34. # pragma warning(push)
  35. #if BOOST_MSVC < 1910
  36. #pragma warning(disable:4800)
  37. #endif
  38. #endif
  39. namespace boost{
  40. namespace BOOST_REGEX_DETAIL_NS{
  41. template <class BidiIterator, class Allocator, class traits>
  42. void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_regex<char_type, traits>& e, match_flag_type f)
  43. {
  44. typedef typename regex_iterator_traits<BidiIterator>::iterator_category category;
  45. typedef typename basic_regex<char_type, traits>::flag_type expression_flag_type;
  46. if(e.empty())
  47. {
  48. // precondition failure: e is not a valid regex.
  49. std::invalid_argument ex("Invalid regular expression object");
  50. boost::throw_exception(ex);
  51. }
  52. pstate = 0;
  53. m_match_flags = f;
  54. estimate_max_state_count(static_cast<category*>(0));
  55. expression_flag_type re_f = re.flags();
  56. icase = re_f & regex_constants::icase;
  57. if(!(m_match_flags & (match_perl|match_posix)))
  58. {
  59. if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0)
  60. m_match_flags |= match_perl;
  61. else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
  62. m_match_flags |= match_perl;
  63. else if((re_f & (regbase::main_option_type|regbase::literal)) == (regbase::literal))
  64. m_match_flags |= match_perl;
  65. else
  66. m_match_flags |= match_posix;
  67. }
  68. if(m_match_flags & match_posix)
  69. {
  70. m_temp_match.reset(new match_results<BidiIterator, Allocator>());
  71. m_presult = m_temp_match.get();
  72. }
  73. else
  74. m_presult = &m_result;
  75. #ifdef BOOST_REGEX_NON_RECURSIVE
  76. m_stack_base = 0;
  77. m_backup_state = 0;
  78. #elif defined(BOOST_REGEX_RECURSIVE)
  79. m_can_backtrack = true;
  80. m_have_accept = false;
  81. #endif
  82. // find the value to use for matching word boundaries:
  83. m_word_mask = re.get_data().m_word_mask;
  84. // find bitmask to use for matching '.':
  85. match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? BOOST_REGEX_DETAIL_NS::test_not_newline : BOOST_REGEX_DETAIL_NS::test_newline);
  86. // Disable match_any if requested in the state machine:
  87. if(e.get_data().m_disable_match_any)
  88. m_match_flags &= regex_constants::match_not_any;
  89. }
  90. template <class BidiIterator, class Allocator, class traits>
  91. void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std::random_access_iterator_tag*)
  92. {
  93. //
  94. // How many states should we allow our machine to visit before giving up?
  95. // This is a heuristic: it takes the greater of O(N^2) and O(NS^2)
  96. // where N is the length of the string, and S is the number of states
  97. // in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2)
  98. // but these take unreasonably amounts of time to bale out in pathological
  99. // cases.
  100. //
  101. // Calculate NS^2 first:
  102. //
  103. static const std::ptrdiff_t k = 100000;
  104. std::ptrdiff_t dist = boost::BOOST_REGEX_DETAIL_NS::distance(base, last);
  105. if(dist == 0)
  106. dist = 1;
  107. std::ptrdiff_t states = re.size();
  108. if(states == 0)
  109. states = 1;
  110. if ((std::numeric_limits<std::ptrdiff_t>::max)() / states < states)
  111. {
  112. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  113. return;
  114. }
  115. states *= states;
  116. if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states)
  117. {
  118. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  119. return;
  120. }
  121. states *= dist;
  122. if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states)
  123. {
  124. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  125. return;
  126. }
  127. states += k;
  128. max_state_count = states;
  129. //
  130. // Now calculate N^2:
  131. //
  132. states = dist;
  133. if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states)
  134. {
  135. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  136. return;
  137. }
  138. states *= dist;
  139. if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states)
  140. {
  141. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  142. return;
  143. }
  144. states += k;
  145. //
  146. // N^2 can be a very large number indeed, to prevent things getting out
  147. // of control, cap the max states:
  148. //
  149. if(states > BOOST_REGEX_MAX_STATE_COUNT)
  150. states = BOOST_REGEX_MAX_STATE_COUNT;
  151. //
  152. // If (the possibly capped) N^2 is larger than our first estimate,
  153. // use this instead:
  154. //
  155. if(states > max_state_count)
  156. max_state_count = states;
  157. }
  158. template <class BidiIterator, class Allocator, class traits>
  159. inline void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
  160. {
  161. // we don't know how long the sequence is:
  162. max_state_count = BOOST_REGEX_MAX_STATE_COUNT;
  163. }
  164. #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
  165. template <class BidiIterator, class Allocator, class traits>
  166. inline bool perl_matcher<BidiIterator, Allocator, traits>::protected_call(
  167. protected_proc_type proc)
  168. {
  169. ::boost::BOOST_REGEX_DETAIL_NS::concrete_protected_call
  170. <perl_matcher<BidiIterator, Allocator, traits> >
  171. obj(this, proc);
  172. return obj.execute();
  173. }
  174. #endif
  175. template <class BidiIterator, class Allocator, class traits>
  176. inline bool perl_matcher<BidiIterator, Allocator, traits>::match()
  177. {
  178. #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
  179. return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::match_imp);
  180. #else
  181. return match_imp();
  182. #endif
  183. }
  184. template <class BidiIterator, class Allocator, class traits>
  185. bool perl_matcher<BidiIterator, Allocator, traits>::match_imp()
  186. {
  187. // initialise our stack if we are non-recursive:
  188. #ifdef BOOST_REGEX_NON_RECURSIVE
  189. save_state_init init(&m_stack_base, &m_backup_state);
  190. used_block_count = BOOST_REGEX_MAX_BLOCKS;
  191. #if !defined(BOOST_NO_EXCEPTIONS)
  192. try{
  193. #endif
  194. #endif
  195. // reset our state machine:
  196. position = base;
  197. search_base = base;
  198. state_count = 0;
  199. m_match_flags |= regex_constants::match_all;
  200. m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), search_base, last);
  201. m_presult->set_base(base);
  202. m_presult->set_named_subs(this->re.get_named_subs());
  203. if(m_match_flags & match_posix)
  204. m_result = *m_presult;
  205. verify_options(re.flags(), m_match_flags);
  206. if(0 == match_prefix())
  207. return false;
  208. return (m_result[0].second == last) && (m_result[0].first == base);
  209. #if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS)
  210. }
  211. catch(...)
  212. {
  213. // unwind all pushed states, apart from anything else this
  214. // ensures that all the states are correctly destructed
  215. // not just the memory freed.
  216. while(unwind(true)){}
  217. throw;
  218. }
  219. #endif
  220. }
  221. template <class BidiIterator, class Allocator, class traits>
  222. inline bool perl_matcher<BidiIterator, Allocator, traits>::find()
  223. {
  224. #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
  225. return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::find_imp);
  226. #else
  227. return find_imp();
  228. #endif
  229. }
  230. template <class BidiIterator, class Allocator, class traits>
  231. bool perl_matcher<BidiIterator, Allocator, traits>::find_imp()
  232. {
  233. static matcher_proc_type const s_find_vtable[7] =
  234. {
  235. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_any,
  236. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_word,
  237. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_line,
  238. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf,
  239. &perl_matcher<BidiIterator, Allocator, traits>::match_prefix,
  240. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit,
  241. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit,
  242. };
  243. // initialise our stack if we are non-recursive:
  244. #ifdef BOOST_REGEX_NON_RECURSIVE
  245. save_state_init init(&m_stack_base, &m_backup_state);
  246. used_block_count = BOOST_REGEX_MAX_BLOCKS;
  247. #if !defined(BOOST_NO_EXCEPTIONS)
  248. try{
  249. #endif
  250. #endif
  251. state_count = 0;
  252. if((m_match_flags & regex_constants::match_init) == 0)
  253. {
  254. // reset our state machine:
  255. search_base = position = base;
  256. pstate = re.get_first_state();
  257. m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), base, last);
  258. m_presult->set_base(base);
  259. m_presult->set_named_subs(this->re.get_named_subs());
  260. m_match_flags |= regex_constants::match_init;
  261. }
  262. else
  263. {
  264. // start again:
  265. search_base = position = m_result[0].second;
  266. // If last match was null and match_not_null was not set then increment
  267. // our start position, otherwise we go into an infinite loop:
  268. if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0))
  269. {
  270. if(position == last)
  271. return false;
  272. else
  273. ++position;
  274. }
  275. // reset $` start:
  276. m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), search_base, last);
  277. //if((base != search_base) && (base == backstop))
  278. // m_match_flags |= match_prev_avail;
  279. }
  280. if(m_match_flags & match_posix)
  281. {
  282. m_result.set_size(static_cast<typename results_type::size_type>(1u + re.mark_count()), base, last);
  283. m_result.set_base(base);
  284. }
  285. verify_options(re.flags(), m_match_flags);
  286. // find out what kind of expression we have:
  287. unsigned type = (m_match_flags & match_continuous) ?
  288. static_cast<unsigned int>(regbase::restart_continue)
  289. : static_cast<unsigned int>(re.get_restart_type());
  290. // call the appropriate search routine:
  291. matcher_proc_type proc = s_find_vtable[type];
  292. return (this->*proc)();
  293. #if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS)
  294. }
  295. catch(...)
  296. {
  297. // unwind all pushed states, apart from anything else this
  298. // ensures that all the states are correctly destructed
  299. // not just the memory freed.
  300. while(unwind(true)){}
  301. throw;
  302. }
  303. #endif
  304. }
  305. template <class BidiIterator, class Allocator, class traits>
  306. bool perl_matcher<BidiIterator, Allocator, traits>::match_prefix()
  307. {
  308. m_has_partial_match = false;
  309. m_has_found_match = false;
  310. pstate = re.get_first_state();
  311. m_presult->set_first(position);
  312. restart = position;
  313. match_all_states();
  314. if(!m_has_found_match && m_has_partial_match && (m_match_flags & match_partial))
  315. {
  316. m_has_found_match = true;
  317. m_presult->set_second(last, 0, false);
  318. position = last;
  319. if((m_match_flags & match_posix) == match_posix)
  320. {
  321. m_result.maybe_assign(*m_presult);
  322. }
  323. }
  324. #ifdef BOOST_REGEX_MATCH_EXTRA
  325. if(m_has_found_match && (match_extra & m_match_flags))
  326. {
  327. //
  328. // we have a match, reverse the capture information:
  329. //
  330. for(unsigned i = 0; i < m_presult->size(); ++i)
  331. {
  332. typename sub_match<BidiIterator>::capture_sequence_type & seq = ((*m_presult)[i]).get_captures();
  333. std::reverse(seq.begin(), seq.end());
  334. }
  335. }
  336. #endif
  337. if(!m_has_found_match)
  338. position = restart; // reset search postion
  339. #ifdef BOOST_REGEX_RECURSIVE
  340. m_can_backtrack = true; // reset for further searches
  341. #endif
  342. return m_has_found_match;
  343. }
  344. template <class BidiIterator, class Allocator, class traits>
  345. bool perl_matcher<BidiIterator, Allocator, traits>::match_literal()
  346. {
  347. unsigned int len = static_cast<const re_literal*>(pstate)->length;
  348. const char_type* what = reinterpret_cast<const char_type*>(static_cast<const re_literal*>(pstate) + 1);
  349. //
  350. // compare string with what we stored in
  351. // our records:
  352. for(unsigned int i = 0; i < len; ++i, ++position)
  353. {
  354. if((position == last) || (traits_inst.translate(*position, icase) != what[i]))
  355. return false;
  356. }
  357. pstate = pstate->next.p;
  358. return true;
  359. }
  360. template <class BidiIterator, class Allocator, class traits>
  361. bool perl_matcher<BidiIterator, Allocator, traits>::match_start_line()
  362. {
  363. if(position == backstop)
  364. {
  365. if((m_match_flags & match_prev_avail) == 0)
  366. {
  367. if((m_match_flags & match_not_bol) == 0)
  368. {
  369. pstate = pstate->next.p;
  370. return true;
  371. }
  372. return false;
  373. }
  374. }
  375. else if(m_match_flags & match_single_line)
  376. return false;
  377. // check the previous value character:
  378. BidiIterator t(position);
  379. --t;
  380. if(position != last)
  381. {
  382. if(is_separator(*t) && !((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n'))) )
  383. {
  384. pstate = pstate->next.p;
  385. return true;
  386. }
  387. }
  388. else if(is_separator(*t))
  389. {
  390. pstate = pstate->next.p;
  391. return true;
  392. }
  393. return false;
  394. }
  395. template <class BidiIterator, class Allocator, class traits>
  396. bool perl_matcher<BidiIterator, Allocator, traits>::match_end_line()
  397. {
  398. if(position != last)
  399. {
  400. if(m_match_flags & match_single_line)
  401. return false;
  402. // we're not yet at the end so *first is always valid:
  403. if(is_separator(*position))
  404. {
  405. if((position != backstop) || (m_match_flags & match_prev_avail))
  406. {
  407. // check that we're not in the middle of \r\n sequence
  408. BidiIterator t(position);
  409. --t;
  410. if((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n')))
  411. {
  412. return false;
  413. }
  414. }
  415. pstate = pstate->next.p;
  416. return true;
  417. }
  418. }
  419. else if((m_match_flags & match_not_eol) == 0)
  420. {
  421. pstate = pstate->next.p;
  422. return true;
  423. }
  424. return false;
  425. }
  426. template <class BidiIterator, class Allocator, class traits>
  427. bool perl_matcher<BidiIterator, Allocator, traits>::match_wild()
  428. {
  429. if(position == last)
  430. return false;
  431. if(is_separator(*position) && ((match_any_mask & static_cast<const re_dot*>(pstate)->mask) == 0))
  432. return false;
  433. if((*position == char_type(0)) && (m_match_flags & match_not_dot_null))
  434. return false;
  435. pstate = pstate->next.p;
  436. ++position;
  437. return true;
  438. }
  439. template <class BidiIterator, class Allocator, class traits>
  440. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary()
  441. {
  442. bool b; // indcates whether next character is a word character
  443. if(position != last)
  444. {
  445. // prev and this character must be opposites:
  446. b = traits_inst.isctype(*position, m_word_mask);
  447. }
  448. else
  449. {
  450. if (m_match_flags & match_not_eow)
  451. return false;
  452. b = false;
  453. }
  454. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  455. {
  456. if(m_match_flags & match_not_bow)
  457. return false;
  458. else
  459. b ^= false;
  460. }
  461. else
  462. {
  463. --position;
  464. b ^= traits_inst.isctype(*position, m_word_mask);
  465. ++position;
  466. }
  467. if(b)
  468. {
  469. pstate = pstate->next.p;
  470. return true;
  471. }
  472. return false; // no match if we get to here...
  473. }
  474. template <class BidiIterator, class Allocator, class traits>
  475. bool perl_matcher<BidiIterator, Allocator, traits>::match_within_word()
  476. {
  477. if(position == last)
  478. return false;
  479. // both prev and this character must be m_word_mask:
  480. bool prev = traits_inst.isctype(*position, m_word_mask);
  481. {
  482. bool b;
  483. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  484. return false;
  485. else
  486. {
  487. --position;
  488. b = traits_inst.isctype(*position, m_word_mask);
  489. ++position;
  490. }
  491. if(b == prev)
  492. {
  493. pstate = pstate->next.p;
  494. return true;
  495. }
  496. }
  497. return false;
  498. }
  499. template <class BidiIterator, class Allocator, class traits>
  500. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_start()
  501. {
  502. if(position == last)
  503. return false; // can't be starting a word if we're already at the end of input
  504. if(!traits_inst.isctype(*position, m_word_mask))
  505. return false; // next character isn't a word character
  506. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  507. {
  508. if(m_match_flags & match_not_bow)
  509. return false; // no previous input
  510. }
  511. else
  512. {
  513. // otherwise inside buffer:
  514. BidiIterator t(position);
  515. --t;
  516. if(traits_inst.isctype(*t, m_word_mask))
  517. return false; // previous character not non-word
  518. }
  519. // OK we have a match:
  520. pstate = pstate->next.p;
  521. return true;
  522. }
  523. template <class BidiIterator, class Allocator, class traits>
  524. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_end()
  525. {
  526. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  527. return false; // start of buffer can't be end of word
  528. BidiIterator t(position);
  529. --t;
  530. if(traits_inst.isctype(*t, m_word_mask) == false)
  531. return false; // previous character wasn't a word character
  532. if(position == last)
  533. {
  534. if(m_match_flags & match_not_eow)
  535. return false; // end of buffer but not end of word
  536. }
  537. else
  538. {
  539. // otherwise inside buffer:
  540. if(traits_inst.isctype(*position, m_word_mask))
  541. return false; // next character is a word character
  542. }
  543. pstate = pstate->next.p;
  544. return true; // if we fall through to here then we've succeeded
  545. }
  546. template <class BidiIterator, class Allocator, class traits>
  547. bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start()
  548. {
  549. if((position != backstop) || (m_match_flags & match_not_bob))
  550. return false;
  551. // OK match:
  552. pstate = pstate->next.p;
  553. return true;
  554. }
  555. template <class BidiIterator, class Allocator, class traits>
  556. bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end()
  557. {
  558. if((position != last) || (m_match_flags & match_not_eob))
  559. return false;
  560. // OK match:
  561. pstate = pstate->next.p;
  562. return true;
  563. }
  564. template <class BidiIterator, class Allocator, class traits>
  565. bool perl_matcher<BidiIterator, Allocator, traits>::match_backref()
  566. {
  567. //
  568. // Compare with what we previously matched.
  569. // Note that this succeeds if the backref did not partisipate
  570. // in the match, this is in line with ECMAScript, but not Perl
  571. // or PCRE.
  572. //
  573. int index = static_cast<const re_brace*>(pstate)->index;
  574. if(index >= 10000)
  575. {
  576. named_subexpressions::range_type r = re.get_data().equal_range(index);
  577. BOOST_ASSERT(r.first != r.second);
  578. do
  579. {
  580. index = r.first->index;
  581. ++r.first;
  582. }while((r.first != r.second) && ((*m_presult)[index].matched != true));
  583. }
  584. if((m_match_flags & match_perl) && !(*m_presult)[index].matched)
  585. return false;
  586. BidiIterator i = (*m_presult)[index].first;
  587. BidiIterator j = (*m_presult)[index].second;
  588. while(i != j)
  589. {
  590. if((position == last) || (traits_inst.translate(*position, icase) != traits_inst.translate(*i, icase)))
  591. return false;
  592. ++i;
  593. ++position;
  594. }
  595. pstate = pstate->next.p;
  596. return true;
  597. }
  598. template <class BidiIterator, class Allocator, class traits>
  599. bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set()
  600. {
  601. typedef typename traits::char_class_type char_class_type;
  602. // let the traits class do the work:
  603. if(position == last)
  604. return false;
  605. BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re.get_data(), icase);
  606. if(t != position)
  607. {
  608. pstate = pstate->next.p;
  609. position = t;
  610. return true;
  611. }
  612. return false;
  613. }
  614. template <class BidiIterator, class Allocator, class traits>
  615. bool perl_matcher<BidiIterator, Allocator, traits>::match_set()
  616. {
  617. if(position == last)
  618. return false;
  619. if(static_cast<const re_set*>(pstate)->_map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  620. {
  621. pstate = pstate->next.p;
  622. ++position;
  623. return true;
  624. }
  625. return false;
  626. }
  627. template <class BidiIterator, class Allocator, class traits>
  628. bool perl_matcher<BidiIterator, Allocator, traits>::match_jump()
  629. {
  630. pstate = static_cast<const re_jump*>(pstate)->alt.p;
  631. return true;
  632. }
  633. template <class BidiIterator, class Allocator, class traits>
  634. bool perl_matcher<BidiIterator, Allocator, traits>::match_combining()
  635. {
  636. if(position == last)
  637. return false;
  638. if(is_combining(traits_inst.translate(*position, icase)))
  639. return false;
  640. ++position;
  641. while((position != last) && is_combining(traits_inst.translate(*position, icase)))
  642. ++position;
  643. pstate = pstate->next.p;
  644. return true;
  645. }
  646. template <class BidiIterator, class Allocator, class traits>
  647. bool perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end()
  648. {
  649. if(m_match_flags & match_not_eob)
  650. return false;
  651. BidiIterator p(position);
  652. while((p != last) && is_separator(traits_inst.translate(*p, icase)))++p;
  653. if(p != last)
  654. return false;
  655. pstate = pstate->next.p;
  656. return true;
  657. }
  658. template <class BidiIterator, class Allocator, class traits>
  659. bool perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue()
  660. {
  661. if(position == search_base)
  662. {
  663. pstate = pstate->next.p;
  664. return true;
  665. }
  666. return false;
  667. }
  668. template <class BidiIterator, class Allocator, class traits>
  669. bool perl_matcher<BidiIterator, Allocator, traits>::match_backstep()
  670. {
  671. #ifdef BOOST_MSVC
  672. #pragma warning(push)
  673. #pragma warning(disable:4127)
  674. #endif
  675. if( ::boost::is_random_access_iterator<BidiIterator>::value)
  676. {
  677. std::ptrdiff_t maxlen = ::boost::BOOST_REGEX_DETAIL_NS::distance(backstop, position);
  678. if(maxlen < static_cast<const re_brace*>(pstate)->index)
  679. return false;
  680. std::advance(position, -static_cast<const re_brace*>(pstate)->index);
  681. }
  682. else
  683. {
  684. int c = static_cast<const re_brace*>(pstate)->index;
  685. while(c--)
  686. {
  687. if(position == backstop)
  688. return false;
  689. --position;
  690. }
  691. }
  692. pstate = pstate->next.p;
  693. return true;
  694. #ifdef BOOST_MSVC
  695. #pragma warning(pop)
  696. #endif
  697. }
  698. template <class BidiIterator, class Allocator, class traits>
  699. inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref()
  700. {
  701. // return true if marked sub-expression N has been matched:
  702. int index = static_cast<const re_brace*>(pstate)->index;
  703. bool result = false;
  704. if(index == 9999)
  705. {
  706. // Magic value for a (DEFINE) block:
  707. return false;
  708. }
  709. else if(index > 0)
  710. {
  711. // Have we matched subexpression "index"?
  712. // Check if index is a hash value:
  713. if(index >= 10000)
  714. {
  715. named_subexpressions::range_type r = re.get_data().equal_range(index);
  716. while(r.first != r.second)
  717. {
  718. if((*m_presult)[r.first->index].matched)
  719. {
  720. result = true;
  721. break;
  722. }
  723. ++r.first;
  724. }
  725. }
  726. else
  727. {
  728. result = (*m_presult)[index].matched;
  729. }
  730. pstate = pstate->next.p;
  731. }
  732. else
  733. {
  734. // Have we recursed into subexpression "index"?
  735. // If index == 0 then check for any recursion at all, otherwise for recursion to -index-1.
  736. int idx = -(index+1);
  737. if(idx >= 10000)
  738. {
  739. named_subexpressions::range_type r = re.get_data().equal_range(idx);
  740. int stack_index = recursion_stack.empty() ? -1 : recursion_stack.back().idx;
  741. while(r.first != r.second)
  742. {
  743. result |= (stack_index == r.first->index);
  744. if(result)break;
  745. ++r.first;
  746. }
  747. }
  748. else
  749. {
  750. result = !recursion_stack.empty() && ((recursion_stack.back().idx == idx) || (index == 0));
  751. }
  752. pstate = pstate->next.p;
  753. }
  754. return result;
  755. }
  756. template <class BidiIterator, class Allocator, class traits>
  757. bool perl_matcher<BidiIterator, Allocator, traits>::match_fail()
  758. {
  759. // Just force a backtrack:
  760. return false;
  761. }
  762. template <class BidiIterator, class Allocator, class traits>
  763. bool perl_matcher<BidiIterator, Allocator, traits>::match_accept()
  764. {
  765. if(!recursion_stack.empty())
  766. {
  767. return skip_until_paren(recursion_stack.back().idx);
  768. }
  769. else
  770. {
  771. return skip_until_paren(INT_MAX);
  772. }
  773. }
  774. template <class BidiIterator, class Allocator, class traits>
  775. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any()
  776. {
  777. #ifdef BOOST_MSVC
  778. #pragma warning(push)
  779. #pragma warning(disable:4127)
  780. #endif
  781. const unsigned char* _map = re.get_map();
  782. while(true)
  783. {
  784. // skip everything we can't match:
  785. while((position != last) && !can_start(*position, _map, (unsigned char)mask_any) )
  786. ++position;
  787. if(position == last)
  788. {
  789. // run out of characters, try a null match if possible:
  790. if(re.can_be_null())
  791. return match_prefix();
  792. break;
  793. }
  794. // now try and obtain a match:
  795. if(match_prefix())
  796. return true;
  797. if(position == last)
  798. return false;
  799. ++position;
  800. }
  801. return false;
  802. #ifdef BOOST_MSVC
  803. #pragma warning(pop)
  804. #endif
  805. }
  806. template <class BidiIterator, class Allocator, class traits>
  807. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_word()
  808. {
  809. #ifdef BOOST_MSVC
  810. #pragma warning(push)
  811. #pragma warning(disable:4127)
  812. #endif
  813. // do search optimised for word starts:
  814. const unsigned char* _map = re.get_map();
  815. if((m_match_flags & match_prev_avail) || (position != base))
  816. --position;
  817. else if(match_prefix())
  818. return true;
  819. do
  820. {
  821. while((position != last) && traits_inst.isctype(*position, m_word_mask))
  822. ++position;
  823. while((position != last) && !traits_inst.isctype(*position, m_word_mask))
  824. ++position;
  825. if(position == last)
  826. break;
  827. if(can_start(*position, _map, (unsigned char)mask_any) )
  828. {
  829. if(match_prefix())
  830. return true;
  831. }
  832. if(position == last)
  833. break;
  834. } while(true);
  835. return false;
  836. #ifdef BOOST_MSVC
  837. #pragma warning(pop)
  838. #endif
  839. }
  840. template <class BidiIterator, class Allocator, class traits>
  841. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_line()
  842. {
  843. // do search optimised for line starts:
  844. const unsigned char* _map = re.get_map();
  845. if(match_prefix())
  846. return true;
  847. while(position != last)
  848. {
  849. while((position != last) && !is_separator(*position))
  850. ++position;
  851. if(position == last)
  852. return false;
  853. ++position;
  854. if(position == last)
  855. {
  856. if(re.can_be_null() && match_prefix())
  857. return true;
  858. return false;
  859. }
  860. if( can_start(*position, _map, (unsigned char)mask_any) )
  861. {
  862. if(match_prefix())
  863. return true;
  864. }
  865. if(position == last)
  866. return false;
  867. //++position;
  868. }
  869. return false;
  870. }
  871. template <class BidiIterator, class Allocator, class traits>
  872. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf()
  873. {
  874. if((position == base) && ((m_match_flags & match_not_bob) == 0))
  875. return match_prefix();
  876. return false;
  877. }
  878. template <class BidiIterator, class Allocator, class traits>
  879. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit()
  880. {
  881. #if 0
  882. if(position == last)
  883. return false; // can't possibly match if we're at the end already
  884. unsigned type = (m_match_flags & match_continuous) ?
  885. static_cast<unsigned int>(regbase::restart_continue)
  886. : static_cast<unsigned int>(re.get_restart_type());
  887. const kmp_info<char_type>* info = access::get_kmp(re);
  888. int len = info->len;
  889. const char_type* x = info->pstr;
  890. int j = 0;
  891. while (position != last)
  892. {
  893. while((j > -1) && (x[j] != traits_inst.translate(*position, icase)))
  894. j = info->kmp_next[j];
  895. ++position;
  896. ++j;
  897. if(j >= len)
  898. {
  899. if(type == regbase::restart_fixed_lit)
  900. {
  901. std::advance(position, -j);
  902. restart = position;
  903. std::advance(restart, len);
  904. m_result.set_first(position);
  905. m_result.set_second(restart);
  906. position = restart;
  907. return true;
  908. }
  909. else
  910. {
  911. restart = position;
  912. std::advance(position, -j);
  913. if(match_prefix())
  914. return true;
  915. else
  916. {
  917. for(int k = 0; (restart != position) && (k < j); ++k, --restart)
  918. {} // dwa 10/20/2000 - warning suppression for MWCW
  919. if(restart != last)
  920. ++restart;
  921. position = restart;
  922. j = 0; //we could do better than this...
  923. }
  924. }
  925. }
  926. }
  927. if((m_match_flags & match_partial) && (position == last) && j)
  928. {
  929. // we need to check for a partial match:
  930. restart = position;
  931. std::advance(position, -j);
  932. return match_prefix();
  933. }
  934. #endif
  935. return false;
  936. }
  937. } // namespace BOOST_REGEX_DETAIL_NS
  938. } // namespace boost
  939. #ifdef BOOST_MSVC
  940. # pragma warning(pop)
  941. #endif
  942. #ifdef __BORLANDC__
  943. # pragma option pop
  944. #endif
  945. #ifdef BOOST_MSVC
  946. #pragma warning(push)
  947. #pragma warning(disable: 4103)
  948. #endif
  949. #ifdef BOOST_HAS_ABI_HEADERS
  950. # include BOOST_ABI_SUFFIX
  951. #endif
  952. #ifdef BOOST_MSVC
  953. #pragma warning(pop)
  954. #endif
  955. #endif