perl_matcher_recursive.hpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131
  1. /*
  2. *
  3. * Copyright (c) 2002
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE perl_matcher_common.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Definitions of perl_matcher member functions that are
  16. * specific to the recursive implementation.
  17. */
  18. #ifndef BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP
  19. #define BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP
  20. #ifdef BOOST_MSVC
  21. #pragma warning(push)
  22. #pragma warning(disable: 4103)
  23. #endif
  24. #ifdef BOOST_HAS_ABI_HEADERS
  25. # include BOOST_ABI_PREFIX
  26. #endif
  27. #ifdef BOOST_MSVC
  28. #pragma warning(pop)
  29. #endif
  30. #ifdef BOOST_MSVC
  31. #pragma warning(push)
  32. #pragma warning(disable: 4800)
  33. #endif
  34. namespace boost{
  35. namespace BOOST_REGEX_DETAIL_NS{
  36. template <class BidiIterator>
  37. class backup_subex
  38. {
  39. int index;
  40. sub_match<BidiIterator> sub;
  41. public:
  42. template <class A>
  43. backup_subex(const match_results<BidiIterator, A>& w, int i)
  44. : index(i), sub(w[i], false) {}
  45. template <class A>
  46. void restore(match_results<BidiIterator, A>& w)
  47. {
  48. w.set_first(sub.first, index, index == 0);
  49. w.set_second(sub.second, index, sub.matched, index == 0);
  50. }
  51. const sub_match<BidiIterator>& get() { return sub; }
  52. };
  53. template <class BidiIterator, class Allocator, class traits>
  54. bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
  55. {
  56. static matcher_proc_type const s_match_vtable[34] =
  57. {
  58. (&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
  59. &perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
  60. &perl_matcher<BidiIterator, Allocator, traits>::match_literal,
  61. &perl_matcher<BidiIterator, Allocator, traits>::match_start_line,
  62. &perl_matcher<BidiIterator, Allocator, traits>::match_end_line,
  63. &perl_matcher<BidiIterator, Allocator, traits>::match_wild,
  64. &perl_matcher<BidiIterator, Allocator, traits>::match_match,
  65. &perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary,
  66. &perl_matcher<BidiIterator, Allocator, traits>::match_within_word,
  67. &perl_matcher<BidiIterator, Allocator, traits>::match_word_start,
  68. &perl_matcher<BidiIterator, Allocator, traits>::match_word_end,
  69. &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start,
  70. &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end,
  71. &perl_matcher<BidiIterator, Allocator, traits>::match_backref,
  72. &perl_matcher<BidiIterator, Allocator, traits>::match_long_set,
  73. &perl_matcher<BidiIterator, Allocator, traits>::match_set,
  74. &perl_matcher<BidiIterator, Allocator, traits>::match_jump,
  75. &perl_matcher<BidiIterator, Allocator, traits>::match_alt,
  76. &perl_matcher<BidiIterator, Allocator, traits>::match_rep,
  77. &perl_matcher<BidiIterator, Allocator, traits>::match_combining,
  78. &perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end,
  79. &perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue,
  80. // Although this next line *should* be evaluated at compile time, in practice
  81. // some compilers (VC++) emit run-time initialisation which breaks thread
  82. // safety, so use a dispatch function instead:
  83. //(::boost::is_random_access_iterator<BidiIterator>::value ? &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast : &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow),
  84. &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_dispatch,
  85. &perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat,
  86. &perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat,
  87. &perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat,
  88. &perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
  89. &perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
  90. &perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case,
  91. &perl_matcher<BidiIterator, Allocator, traits>::match_recursion,
  92. &perl_matcher<BidiIterator, Allocator, traits>::match_fail,
  93. &perl_matcher<BidiIterator, Allocator, traits>::match_accept,
  94. &perl_matcher<BidiIterator, Allocator, traits>::match_commit,
  95. &perl_matcher<BidiIterator, Allocator, traits>::match_then,
  96. };
  97. if(state_count > max_state_count)
  98. raise_error(traits_inst, regex_constants::error_complexity);
  99. while(pstate)
  100. {
  101. matcher_proc_type proc = s_match_vtable[pstate->type];
  102. ++state_count;
  103. if(!(this->*proc)())
  104. {
  105. if((m_match_flags & match_partial) && (position == last) && (position != search_base))
  106. m_has_partial_match = true;
  107. return 0;
  108. }
  109. }
  110. return true;
  111. }
  112. template <class BidiIterator, class Allocator, class traits>
  113. bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
  114. {
  115. int index = static_cast<const re_brace*>(pstate)->index;
  116. icase = static_cast<const re_brace*>(pstate)->icase;
  117. bool r = true;
  118. switch(index)
  119. {
  120. case 0:
  121. pstate = pstate->next.p;
  122. break;
  123. case -1:
  124. case -2:
  125. {
  126. // forward lookahead assert:
  127. BidiIterator old_position(position);
  128. const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
  129. pstate = pstate->next.p->next.p;
  130. r = match_all_states();
  131. pstate = next_pstate;
  132. position = old_position;
  133. if((r && (index != -1)) || (!r && (index != -2)))
  134. r = false;
  135. else
  136. r = true;
  137. if(r && m_have_accept)
  138. r = skip_until_paren(INT_MAX);
  139. break;
  140. }
  141. case -3:
  142. {
  143. // independent sub-expression:
  144. bool old_independent = m_independent;
  145. m_independent = true;
  146. const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
  147. pstate = pstate->next.p->next.p;
  148. bool can_backtrack = m_can_backtrack;
  149. r = match_all_states();
  150. if(r)
  151. m_can_backtrack = can_backtrack;
  152. pstate = next_pstate;
  153. m_independent = old_independent;
  154. #ifdef BOOST_REGEX_MATCH_EXTRA
  155. if(r && (m_match_flags & match_extra))
  156. {
  157. //
  158. // our captures have been stored in *m_presult
  159. // we need to unpack them, and insert them
  160. // back in the right order when we unwind the stack:
  161. //
  162. unsigned i;
  163. match_results<BidiIterator, Allocator> tm(*m_presult);
  164. for(i = 0; i < tm.size(); ++i)
  165. (*m_presult)[i].get_captures().clear();
  166. // match everything else:
  167. r = match_all_states();
  168. // now place the stored captures back:
  169. for(i = 0; i < tm.size(); ++i)
  170. {
  171. typedef typename sub_match<BidiIterator>::capture_sequence_type seq;
  172. seq& s1 = (*m_presult)[i].get_captures();
  173. const seq& s2 = tm[i].captures();
  174. s1.insert(
  175. s1.end(),
  176. s2.begin(),
  177. s2.end());
  178. }
  179. }
  180. #endif
  181. if(r && m_have_accept)
  182. r = skip_until_paren(INT_MAX);
  183. break;
  184. }
  185. case -4:
  186. {
  187. // conditional expression:
  188. const re_alt* alt = static_cast<const re_alt*>(pstate->next.p);
  189. BOOST_ASSERT(alt->type == syntax_element_alt);
  190. pstate = alt->next.p;
  191. if(pstate->type == syntax_element_assert_backref)
  192. {
  193. if(!match_assert_backref())
  194. pstate = alt->alt.p;
  195. break;
  196. }
  197. else
  198. {
  199. // zero width assertion, have to match this recursively:
  200. BOOST_ASSERT(pstate->type == syntax_element_startmark);
  201. bool negated = static_cast<const re_brace*>(pstate)->index == -2;
  202. BidiIterator saved_position = position;
  203. const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
  204. pstate = pstate->next.p->next.p;
  205. bool res = match_all_states();
  206. position = saved_position;
  207. if(negated)
  208. res = !res;
  209. if(res)
  210. pstate = next_pstate;
  211. else
  212. pstate = alt->alt.p;
  213. break;
  214. }
  215. }
  216. case -5:
  217. {
  218. // Reset start of $0, since we have a \K escape
  219. backup_subex<BidiIterator> sub(*m_presult, 0);
  220. m_presult->set_first(position, 0, true);
  221. pstate = pstate->next.p;
  222. r = match_all_states();
  223. if(r == false)
  224. sub.restore(*m_presult);
  225. break;
  226. }
  227. default:
  228. {
  229. BOOST_ASSERT(index > 0);
  230. if((m_match_flags & match_nosubs) == 0)
  231. {
  232. backup_subex<BidiIterator> sub(*m_presult, index);
  233. m_presult->set_first(position, index);
  234. pstate = pstate->next.p;
  235. r = match_all_states();
  236. if(r == false)
  237. sub.restore(*m_presult);
  238. #ifdef BOOST_REGEX_MATCH_EXTRA
  239. //
  240. // we have a match, push the capture information onto the stack:
  241. //
  242. else if(sub.get().matched && (match_extra & m_match_flags))
  243. ((*m_presult)[index]).get_captures().push_back(sub.get());
  244. #endif
  245. }
  246. else
  247. {
  248. pstate = pstate->next.p;
  249. }
  250. break;
  251. }
  252. }
  253. return r;
  254. }
  255. template <class BidiIterator, class Allocator, class traits>
  256. bool perl_matcher<BidiIterator, Allocator, traits>::match_alt()
  257. {
  258. bool take_first, take_second;
  259. const re_alt* jmp = static_cast<const re_alt*>(pstate);
  260. // find out which of these two alternatives we need to take:
  261. if(position == last)
  262. {
  263. take_first = jmp->can_be_null & mask_take;
  264. take_second = jmp->can_be_null & mask_skip;
  265. }
  266. else
  267. {
  268. take_first = can_start(*position, jmp->_map, (unsigned char)mask_take);
  269. take_second = can_start(*position, jmp->_map, (unsigned char)mask_skip);
  270. }
  271. if(take_first)
  272. {
  273. // we can take the first alternative,
  274. // see if we need to push next alternative:
  275. if(take_second)
  276. {
  277. BidiIterator oldposition(position);
  278. const re_syntax_base* old_pstate = jmp->alt.p;
  279. pstate = pstate->next.p;
  280. bool oldcase = icase;
  281. m_have_then = false;
  282. if(!match_all_states())
  283. {
  284. pstate = old_pstate;
  285. position = oldposition;
  286. icase = oldcase;
  287. if(m_have_then)
  288. {
  289. m_can_backtrack = true;
  290. m_have_then = false;
  291. return false;
  292. }
  293. }
  294. m_have_then = false;
  295. return m_can_backtrack;
  296. }
  297. pstate = pstate->next.p;
  298. return true;
  299. }
  300. if(take_second)
  301. {
  302. pstate = jmp->alt.p;
  303. return true;
  304. }
  305. return false; // neither option is possible
  306. }
  307. template <class BidiIterator, class Allocator, class traits>
  308. bool perl_matcher<BidiIterator, Allocator, traits>::match_rep()
  309. {
  310. #ifdef BOOST_MSVC
  311. #pragma warning(push)
  312. #pragma warning(disable:4127 4244)
  313. #endif
  314. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  315. //
  316. // Always copy the repeat count, so that the state is restored
  317. // when we exit this scope:
  318. //
  319. repeater_count<BidiIterator> r(rep->state_id, &next_count, position, this->recursion_stack.size() ? this->recursion_stack.back().idx : INT_MIN + 3);
  320. //
  321. // If we've had at least one repeat already, and the last one
  322. // matched the NULL string then set the repeat count to
  323. // maximum:
  324. //
  325. next_count->check_null_repeat(position, rep->max);
  326. // find out which of these two alternatives we need to take:
  327. bool take_first, take_second;
  328. if(position == last)
  329. {
  330. take_first = rep->can_be_null & mask_take;
  331. take_second = rep->can_be_null & mask_skip;
  332. }
  333. else
  334. {
  335. take_first = can_start(*position, rep->_map, (unsigned char)mask_take);
  336. take_second = can_start(*position, rep->_map, (unsigned char)mask_skip);
  337. }
  338. if(next_count->get_count() < rep->min)
  339. {
  340. // we must take the repeat:
  341. if(take_first)
  342. {
  343. // increase the counter:
  344. ++(*next_count);
  345. pstate = rep->next.p;
  346. return match_all_states();
  347. }
  348. return false;
  349. }
  350. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  351. if(greedy)
  352. {
  353. // try and take the repeat if we can:
  354. if((next_count->get_count() < rep->max) && take_first)
  355. {
  356. // store position in case we fail:
  357. BidiIterator pos = position;
  358. // increase the counter:
  359. ++(*next_count);
  360. pstate = rep->next.p;
  361. if(match_all_states())
  362. return true;
  363. if(!m_can_backtrack)
  364. return false;
  365. // failed repeat, reset posistion and fall through for alternative:
  366. position = pos;
  367. }
  368. if(take_second)
  369. {
  370. pstate = rep->alt.p;
  371. return true;
  372. }
  373. return false; // can't take anything, fail...
  374. }
  375. else // non-greedy
  376. {
  377. // try and skip the repeat if we can:
  378. if(take_second)
  379. {
  380. // store position in case we fail:
  381. BidiIterator pos = position;
  382. pstate = rep->alt.p;
  383. if(match_all_states())
  384. return true;
  385. if(!m_can_backtrack)
  386. return false;
  387. // failed alternative, reset posistion and fall through for repeat:
  388. position = pos;
  389. }
  390. if((next_count->get_count() < rep->max) && take_first)
  391. {
  392. // increase the counter:
  393. ++(*next_count);
  394. pstate = rep->next.p;
  395. return match_all_states();
  396. }
  397. }
  398. return false;
  399. #ifdef BOOST_MSVC
  400. #pragma warning(pop)
  401. #endif
  402. }
  403. template <class BidiIterator, class Allocator, class traits>
  404. bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow()
  405. {
  406. #ifdef BOOST_MSVC
  407. #pragma warning(push)
  408. #pragma warning(disable:4127)
  409. #endif
  410. std::size_t count = 0;
  411. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  412. re_syntax_base* psingle = rep->next.p;
  413. // match compulsary repeats first:
  414. while(count < rep->min)
  415. {
  416. pstate = psingle;
  417. if(!match_wild())
  418. return false;
  419. ++count;
  420. }
  421. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  422. if(greedy)
  423. {
  424. // normal repeat:
  425. while(count < rep->max)
  426. {
  427. pstate = psingle;
  428. if(!match_wild())
  429. break;
  430. ++count;
  431. }
  432. if((rep->leading) && (count < rep->max))
  433. restart = position;
  434. pstate = rep;
  435. return backtrack_till_match(count - rep->min);
  436. }
  437. else
  438. {
  439. // non-greedy, keep trying till we get a match:
  440. BidiIterator save_pos;
  441. do
  442. {
  443. if((rep->leading) && (rep->max == UINT_MAX))
  444. restart = position;
  445. pstate = rep->alt.p;
  446. save_pos = position;
  447. ++state_count;
  448. if(match_all_states())
  449. return true;
  450. if((count >= rep->max) || !m_can_backtrack)
  451. return false;
  452. ++count;
  453. pstate = psingle;
  454. position = save_pos;
  455. if(!match_wild())
  456. return false;
  457. }while(true);
  458. }
  459. #ifdef BOOST_MSVC
  460. #pragma warning(pop)
  461. #endif
  462. }
  463. template <class BidiIterator, class Allocator, class traits>
  464. bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast()
  465. {
  466. #ifdef BOOST_MSVC
  467. #pragma warning(push)
  468. #pragma warning(disable:4127)
  469. #endif
  470. if(m_match_flags & match_not_dot_null)
  471. return match_dot_repeat_slow();
  472. if((static_cast<const re_dot*>(pstate->next.p)->mask & match_any_mask) == 0)
  473. return match_dot_repeat_slow();
  474. //
  475. // start by working out how much we can skip:
  476. //
  477. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  478. #ifdef BOOST_MSVC
  479. #pragma warning(push)
  480. #pragma warning(disable:4267)
  481. #endif
  482. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  483. std::size_t count = (std::min)(static_cast<std::size_t>(::boost::BOOST_REGEX_DETAIL_NS::distance(position, last)), greedy ? rep->max : rep->min);
  484. if(rep->min > count)
  485. {
  486. position = last;
  487. return false; // not enough text left to match
  488. }
  489. std::advance(position, count);
  490. #ifdef BOOST_MSVC
  491. #pragma warning(pop)
  492. #endif
  493. if((rep->leading) && (count < rep->max) && greedy)
  494. restart = position;
  495. if(greedy)
  496. return backtrack_till_match(count - rep->min);
  497. // non-greedy, keep trying till we get a match:
  498. BidiIterator save_pos;
  499. do
  500. {
  501. while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
  502. {
  503. ++position;
  504. ++count;
  505. }
  506. if((rep->leading) && (rep->max == UINT_MAX))
  507. restart = position;
  508. pstate = rep->alt.p;
  509. save_pos = position;
  510. ++state_count;
  511. if(match_all_states())
  512. return true;
  513. if((count >= rep->max) || !m_can_backtrack)
  514. return false;
  515. if(save_pos == last)
  516. return false;
  517. position = ++save_pos;
  518. ++count;
  519. }while(true);
  520. #ifdef BOOST_MSVC
  521. #pragma warning(pop)
  522. #endif
  523. }
  524. template <class BidiIterator, class Allocator, class traits>
  525. bool perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat()
  526. {
  527. #ifdef BOOST_MSVC
  528. #pragma warning(push)
  529. #pragma warning(disable:4127)
  530. #pragma warning(disable:4267)
  531. #endif
  532. #ifdef __BORLANDC__
  533. #pragma option push -w-8008 -w-8066 -w-8004
  534. #endif
  535. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  536. BOOST_ASSERT(1 == static_cast<const re_literal*>(rep->next.p)->length);
  537. const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(rep->next.p) + 1);
  538. //
  539. // start by working out how much we can skip:
  540. //
  541. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  542. std::size_t count, desired;
  543. if(::boost::is_random_access_iterator<BidiIterator>::value)
  544. {
  545. desired =
  546. (std::min)(
  547. (std::size_t)(greedy ? rep->max : rep->min),
  548. (std::size_t)::boost::BOOST_REGEX_DETAIL_NS::distance(position, last));
  549. count = desired;
  550. ++desired;
  551. if(icase)
  552. {
  553. while(--desired && (traits_inst.translate_nocase(*position) == what))
  554. {
  555. ++position;
  556. }
  557. }
  558. else
  559. {
  560. while(--desired && (traits_inst.translate(*position) == what))
  561. {
  562. ++position;
  563. }
  564. }
  565. count = count - desired;
  566. }
  567. else
  568. {
  569. count = 0;
  570. desired = greedy ? rep->max : rep->min;
  571. while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what))
  572. {
  573. ++position;
  574. ++count;
  575. }
  576. }
  577. if((rep->leading) && (count < rep->max) && greedy)
  578. restart = position;
  579. if(count < rep->min)
  580. return false;
  581. if(greedy)
  582. return backtrack_till_match(count - rep->min);
  583. // non-greedy, keep trying till we get a match:
  584. BidiIterator save_pos;
  585. do
  586. {
  587. while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
  588. {
  589. if((traits_inst.translate(*position, icase) == what))
  590. {
  591. ++position;
  592. ++count;
  593. }
  594. else
  595. return false; // counldn't repeat even though it was the only option
  596. }
  597. if((rep->leading) && (rep->max == UINT_MAX))
  598. restart = position;
  599. pstate = rep->alt.p;
  600. save_pos = position;
  601. ++state_count;
  602. if(match_all_states())
  603. return true;
  604. if((count >= rep->max) || !m_can_backtrack)
  605. return false;
  606. position = save_pos;
  607. if(position == last)
  608. return false;
  609. if(traits_inst.translate(*position, icase) == what)
  610. {
  611. ++position;
  612. ++count;
  613. }
  614. else
  615. {
  616. return false;
  617. }
  618. }while(true);
  619. #ifdef __BORLANDC__
  620. #pragma option pop
  621. #endif
  622. #ifdef BOOST_MSVC
  623. #pragma warning(pop)
  624. #endif
  625. }
  626. template <class BidiIterator, class Allocator, class traits>
  627. bool perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat()
  628. {
  629. #ifdef BOOST_MSVC
  630. #pragma warning(push)
  631. #pragma warning(disable:4127)
  632. #endif
  633. #ifdef __BORLANDC__
  634. #pragma option push -w-8008 -w-8066 -w-8004
  635. #endif
  636. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  637. const unsigned char* map = static_cast<const re_set*>(rep->next.p)->_map;
  638. std::size_t count = 0;
  639. //
  640. // start by working out how much we can skip:
  641. //
  642. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  643. std::size_t desired = greedy ? rep->max : rep->min;
  644. if(::boost::is_random_access_iterator<BidiIterator>::value)
  645. {
  646. BidiIterator end = position;
  647. // Move end forward by "desired", preferably without using distance or advance if we can
  648. // as these can be slow for some iterator types.
  649. std::size_t len = (desired == (std::numeric_limits<std::size_t>::max)()) ? 0u : ::boost::BOOST_REGEX_DETAIL_NS::distance(position, last);
  650. if(desired >= len)
  651. end = last;
  652. else
  653. std::advance(end, desired);
  654. BidiIterator origin(position);
  655. while((position != end) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  656. {
  657. ++position;
  658. }
  659. count = (unsigned)::boost::BOOST_REGEX_DETAIL_NS::distance(origin, position);
  660. }
  661. else
  662. {
  663. while((count < desired) && (position != last) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  664. {
  665. ++position;
  666. ++count;
  667. }
  668. }
  669. if((rep->leading) && (count < rep->max) && greedy)
  670. restart = position;
  671. if(count < rep->min)
  672. return false;
  673. if(greedy)
  674. return backtrack_till_match(count - rep->min);
  675. // non-greedy, keep trying till we get a match:
  676. BidiIterator save_pos;
  677. do
  678. {
  679. while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
  680. {
  681. if(map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  682. {
  683. ++position;
  684. ++count;
  685. }
  686. else
  687. return false; // counldn't repeat even though it was the only option
  688. }
  689. if((rep->leading) && (rep->max == UINT_MAX))
  690. restart = position;
  691. pstate = rep->alt.p;
  692. save_pos = position;
  693. ++state_count;
  694. if(match_all_states())
  695. return true;
  696. if((count >= rep->max) || !m_can_backtrack)
  697. return false;
  698. position = save_pos;
  699. if(position == last)
  700. return false;
  701. if(map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  702. {
  703. ++position;
  704. ++count;
  705. }
  706. else
  707. {
  708. return false;
  709. }
  710. }while(true);
  711. #ifdef __BORLANDC__
  712. #pragma option pop
  713. #endif
  714. #ifdef BOOST_MSVC
  715. #pragma warning(pop)
  716. #endif
  717. }
  718. template <class BidiIterator, class Allocator, class traits>
  719. bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
  720. {
  721. #ifdef BOOST_MSVC
  722. #pragma warning(push)
  723. #pragma warning(disable:4127)
  724. #endif
  725. #ifdef __BORLANDC__
  726. #pragma option push -w-8008 -w-8066 -w-8004
  727. #endif
  728. typedef typename traits::char_class_type char_class_type;
  729. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  730. const re_set_long<char_class_type>* set = static_cast<const re_set_long<char_class_type>*>(pstate->next.p);
  731. std::size_t count = 0;
  732. //
  733. // start by working out how much we can skip:
  734. //
  735. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  736. std::size_t desired = greedy ? rep->max : rep->min;
  737. if(::boost::is_random_access_iterator<BidiIterator>::value)
  738. {
  739. BidiIterator end = position;
  740. // Move end forward by "desired", preferably without using distance or advance if we can
  741. // as these can be slow for some iterator types.
  742. std::size_t len = (desired == (std::numeric_limits<std::size_t>::max)()) ? 0u : ::boost::BOOST_REGEX_DETAIL_NS::distance(position, last);
  743. if(desired >= len)
  744. end = last;
  745. else
  746. std::advance(end, desired);
  747. BidiIterator origin(position);
  748. while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase)))
  749. {
  750. ++position;
  751. }
  752. count = (unsigned)::boost::BOOST_REGEX_DETAIL_NS::distance(origin, position);
  753. }
  754. else
  755. {
  756. while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase)))
  757. {
  758. ++position;
  759. ++count;
  760. }
  761. }
  762. if((rep->leading) && (count < rep->max) && greedy)
  763. restart = position;
  764. if(count < rep->min)
  765. return false;
  766. if(greedy)
  767. return backtrack_till_match(count - rep->min);
  768. // non-greedy, keep trying till we get a match:
  769. BidiIterator save_pos;
  770. do
  771. {
  772. while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
  773. {
  774. if(position != re_is_set_member(position, last, set, re.get_data(), icase))
  775. {
  776. ++position;
  777. ++count;
  778. }
  779. else
  780. return false; // counldn't repeat even though it was the only option
  781. }
  782. if((rep->leading) && (rep->max == UINT_MAX))
  783. restart = position;
  784. pstate = rep->alt.p;
  785. save_pos = position;
  786. ++state_count;
  787. if(match_all_states())
  788. return true;
  789. if((count >= rep->max) || !m_can_backtrack)
  790. return false;
  791. position = save_pos;
  792. if(position == last)
  793. return false;
  794. if(position != re_is_set_member(position, last, set, re.get_data(), icase))
  795. {
  796. ++position;
  797. ++count;
  798. }
  799. else
  800. {
  801. return false;
  802. }
  803. }while(true);
  804. #ifdef __BORLANDC__
  805. #pragma option pop
  806. #endif
  807. #ifdef BOOST_MSVC
  808. #pragma warning(pop)
  809. #endif
  810. }
  811. template <class BidiIterator, class Allocator, class traits>
  812. bool perl_matcher<BidiIterator, Allocator, traits>::backtrack_till_match(std::size_t count)
  813. {
  814. #ifdef BOOST_MSVC
  815. #pragma warning(push)
  816. #pragma warning(disable:4127)
  817. #endif
  818. if(!m_can_backtrack)
  819. return false;
  820. if((m_match_flags & match_partial) && (position == last))
  821. m_has_partial_match = true;
  822. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  823. BidiIterator backtrack = position;
  824. if(position == last)
  825. {
  826. if(rep->can_be_null & mask_skip)
  827. {
  828. pstate = rep->alt.p;
  829. if(match_all_states())
  830. return true;
  831. }
  832. if(count)
  833. {
  834. position = --backtrack;
  835. --count;
  836. }
  837. else
  838. return false;
  839. }
  840. do
  841. {
  842. while(count && !can_start(*position, rep->_map, mask_skip))
  843. {
  844. --position;
  845. --count;
  846. ++state_count;
  847. }
  848. pstate = rep->alt.p;
  849. backtrack = position;
  850. if(match_all_states())
  851. return true;
  852. if(count == 0)
  853. return false;
  854. position = --backtrack;
  855. ++state_count;
  856. --count;
  857. }while(true);
  858. #ifdef BOOST_MSVC
  859. #pragma warning(pop)
  860. #endif
  861. }
  862. template <class BidiIterator, class Allocator, class traits>
  863. bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion()
  864. {
  865. BOOST_ASSERT(pstate->type == syntax_element_recurse);
  866. //
  867. // Set new call stack:
  868. //
  869. if(recursion_stack.capacity() == 0)
  870. {
  871. recursion_stack.reserve(50);
  872. }
  873. //
  874. // See if we've seen this recursion before at this location, if we have then
  875. // we need to prevent infinite recursion:
  876. //
  877. for(typename std::vector<recursion_info<results_type> >::reverse_iterator i = recursion_stack.rbegin(); i != recursion_stack.rend(); ++i)
  878. {
  879. if(i->idx == static_cast<const re_brace*>(static_cast<const re_jump*>(pstate)->alt.p)->index)
  880. {
  881. if(i->location_of_start == position)
  882. return false;
  883. break;
  884. }
  885. }
  886. //
  887. // Now get on with it:
  888. //
  889. recursion_stack.push_back(recursion_info<results_type>());
  890. recursion_stack.back().preturn_address = pstate->next.p;
  891. recursion_stack.back().results = *m_presult;
  892. recursion_stack.back().repeater_stack = next_count;
  893. recursion_stack.back().location_of_start = position;
  894. pstate = static_cast<const re_jump*>(pstate)->alt.p;
  895. recursion_stack.back().idx = static_cast<const re_brace*>(pstate)->index;
  896. repeater_count<BidiIterator>* saved = next_count;
  897. repeater_count<BidiIterator> r(&next_count); // resets all repeat counts since we're recursing and starting fresh on those
  898. next_count = &r;
  899. bool can_backtrack = m_can_backtrack;
  900. bool result = match_all_states();
  901. m_can_backtrack = can_backtrack;
  902. next_count = saved;
  903. if(!result)
  904. {
  905. next_count = recursion_stack.back().repeater_stack;
  906. *m_presult = recursion_stack.back().results;
  907. recursion_stack.pop_back();
  908. return false;
  909. }
  910. return true;
  911. }
  912. template <class BidiIterator, class Allocator, class traits>
  913. bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark()
  914. {
  915. int index = static_cast<const re_brace*>(pstate)->index;
  916. icase = static_cast<const re_brace*>(pstate)->icase;
  917. if(index > 0)
  918. {
  919. if((m_match_flags & match_nosubs) == 0)
  920. {
  921. m_presult->set_second(position, index);
  922. }
  923. if(!recursion_stack.empty())
  924. {
  925. if(index == recursion_stack.back().idx)
  926. {
  927. recursion_info<results_type> saved = recursion_stack.back();
  928. recursion_stack.pop_back();
  929. pstate = saved.preturn_address;
  930. repeater_count<BidiIterator>* saved_count = next_count;
  931. next_count = saved.repeater_stack;
  932. *m_presult = saved.results;
  933. if(!match_all_states())
  934. {
  935. recursion_stack.push_back(saved);
  936. next_count = saved_count;
  937. return false;
  938. }
  939. }
  940. }
  941. }
  942. else if((index < 0) && (index != -4))
  943. {
  944. // matched forward lookahead:
  945. pstate = 0;
  946. return true;
  947. }
  948. pstate = pstate ? pstate->next.p : 0;
  949. return true;
  950. }
  951. template <class BidiIterator, class Allocator, class traits>
  952. bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
  953. {
  954. if(!recursion_stack.empty())
  955. {
  956. BOOST_ASSERT(0 == recursion_stack.back().idx);
  957. const re_syntax_base* saved_state = pstate = recursion_stack.back().preturn_address;
  958. *m_presult = recursion_stack.back().results;
  959. recursion_stack.pop_back();
  960. if(!match_all_states())
  961. {
  962. recursion_stack.push_back(recursion_info<results_type>());
  963. recursion_stack.back().preturn_address = saved_state;
  964. recursion_stack.back().results = *m_presult;
  965. recursion_stack.back().location_of_start = position;
  966. return false;
  967. }
  968. return true;
  969. }
  970. if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first))
  971. return false;
  972. if((m_match_flags & match_all) && (position != last))
  973. return false;
  974. if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base))
  975. return false;
  976. m_presult->set_second(position);
  977. pstate = 0;
  978. m_has_found_match = true;
  979. if((m_match_flags & match_posix) == match_posix)
  980. {
  981. m_result.maybe_assign(*m_presult);
  982. if((m_match_flags & match_any) == 0)
  983. return false;
  984. }
  985. #ifdef BOOST_REGEX_MATCH_EXTRA
  986. if(match_extra & m_match_flags)
  987. {
  988. for(unsigned i = 0; i < m_presult->size(); ++i)
  989. if((*m_presult)[i].matched)
  990. ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]);
  991. }
  992. #endif
  993. return true;
  994. }
  995. template <class BidiIterator, class Allocator, class traits>
  996. bool perl_matcher<BidiIterator, Allocator, traits>::match_commit()
  997. {
  998. m_can_backtrack = false;
  999. int action = static_cast<const re_commit*>(pstate)->action;
  1000. switch(action)
  1001. {
  1002. case commit_commit:
  1003. restart = last;
  1004. break;
  1005. case commit_skip:
  1006. restart = position;
  1007. break;
  1008. }
  1009. pstate = pstate->next.p;
  1010. return true;
  1011. }
  1012. template <class BidiIterator, class Allocator, class traits>
  1013. bool perl_matcher<BidiIterator, Allocator, traits>::match_then()
  1014. {
  1015. pstate = pstate->next.p;
  1016. if(match_all_states())
  1017. return true;
  1018. m_can_backtrack = false;
  1019. m_have_then = true;
  1020. return false;
  1021. }
  1022. template <class BidiIterator, class Allocator, class traits>
  1023. bool perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case()
  1024. {
  1025. // change our case sensitivity:
  1026. bool oldcase = this->icase;
  1027. this->icase = static_cast<const re_case*>(pstate)->icase;
  1028. pstate = pstate->next.p;
  1029. bool result = match_all_states();
  1030. this->icase = oldcase;
  1031. return result;
  1032. }
  1033. template <class BidiIterator, class Allocator, class traits>
  1034. bool perl_matcher<BidiIterator, Allocator, traits>::skip_until_paren(int index, bool have_match)
  1035. {
  1036. while(pstate)
  1037. {
  1038. if(pstate->type == syntax_element_endmark)
  1039. {
  1040. if(static_cast<const re_brace*>(pstate)->index == index)
  1041. {
  1042. if(have_match)
  1043. return this->match_endmark();
  1044. pstate = pstate->next.p;
  1045. return true;
  1046. }
  1047. else
  1048. {
  1049. // Unenclosed closing ), occurs when (*ACCEPT) is inside some other
  1050. // parenthesis which may or may not have other side effects associated with it.
  1051. bool r = match_endmark();
  1052. m_have_accept = true;
  1053. if(!pstate)
  1054. return r;
  1055. }
  1056. continue;
  1057. }
  1058. else if(pstate->type == syntax_element_match)
  1059. return true;
  1060. else if(pstate->type == syntax_element_startmark)
  1061. {
  1062. int idx = static_cast<const re_brace*>(pstate)->index;
  1063. pstate = pstate->next.p;
  1064. skip_until_paren(idx, false);
  1065. continue;
  1066. }
  1067. pstate = pstate->next.p;
  1068. }
  1069. return true;
  1070. }
  1071. } // namespace BOOST_REGEX_DETAIL_NS
  1072. } // namespace boost
  1073. #ifdef BOOST_MSVC
  1074. #pragma warning(pop)
  1075. #endif
  1076. #ifdef BOOST_MSVC
  1077. #pragma warning(push)
  1078. #pragma warning(disable: 4103)
  1079. #endif
  1080. #ifdef BOOST_HAS_ABI_HEADERS
  1081. # include BOOST_ABI_SUFFIX
  1082. #endif
  1083. #ifdef BOOST_MSVC
  1084. #pragma warning(pop)
  1085. #endif
  1086. #endif