basic_regex_parser.hpp 110 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE basic_regex_parser.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Declares template class basic_regex_parser.
  16. */
  17. #ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
  18. #define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
  19. #ifdef BOOST_MSVC
  20. #pragma warning(push)
  21. #pragma warning(disable: 4103)
  22. #endif
  23. #ifdef BOOST_HAS_ABI_HEADERS
  24. # include BOOST_ABI_PREFIX
  25. #endif
  26. #ifdef BOOST_MSVC
  27. #pragma warning(pop)
  28. #endif
  29. namespace boost{
  30. namespace BOOST_REGEX_DETAIL_NS{
  31. #ifdef BOOST_MSVC
  32. #pragma warning(push)
  33. #pragma warning(disable:4244)
  34. #if BOOST_MSVC < 1910
  35. #pragma warning(disable:4800)
  36. #endif
  37. #endif
  38. inline boost::intmax_t umax(mpl::false_ const&)
  39. {
  40. // Get out clause here, just in case numeric_limits is unspecialized:
  41. return std::numeric_limits<boost::intmax_t>::is_specialized ? (std::numeric_limits<boost::intmax_t>::max)() : INT_MAX;
  42. }
  43. inline boost::intmax_t umax(mpl::true_ const&)
  44. {
  45. return (std::numeric_limits<std::size_t>::max)();
  46. }
  47. inline boost::intmax_t umax()
  48. {
  49. return umax(mpl::bool_<std::numeric_limits<boost::intmax_t>::digits >= std::numeric_limits<std::size_t>::digits>());
  50. }
  51. template <class charT, class traits>
  52. class basic_regex_parser : public basic_regex_creator<charT, traits>
  53. {
  54. public:
  55. basic_regex_parser(regex_data<charT, traits>* data);
  56. void parse(const charT* p1, const charT* p2, unsigned flags);
  57. void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
  58. void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
  59. void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
  60. {
  61. fail(error_code, position, message, position);
  62. }
  63. bool parse_all();
  64. bool parse_basic();
  65. bool parse_extended();
  66. bool parse_literal();
  67. bool parse_open_paren();
  68. bool parse_basic_escape();
  69. bool parse_extended_escape();
  70. bool parse_match_any();
  71. bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
  72. bool parse_repeat_range(bool isbasic);
  73. bool parse_alt();
  74. bool parse_set();
  75. bool parse_backref();
  76. void parse_set_literal(basic_char_set<charT, traits>& char_set);
  77. bool parse_inner_set(basic_char_set<charT, traits>& char_set);
  78. bool parse_QE();
  79. bool parse_perl_extension();
  80. bool parse_perl_verb();
  81. bool match_verb(const char*);
  82. bool add_emacs_code(bool negate);
  83. bool unwind_alts(std::ptrdiff_t last_paren_start);
  84. digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
  85. charT unescape_character();
  86. regex_constants::syntax_option_type parse_options();
  87. private:
  88. typedef bool (basic_regex_parser::*parser_proc_type)();
  89. typedef typename traits::string_type string_type;
  90. typedef typename traits::char_class_type char_class_type;
  91. parser_proc_type m_parser_proc; // the main parser to use
  92. const charT* m_base; // the start of the string being parsed
  93. const charT* m_end; // the end of the string being parsed
  94. const charT* m_position; // our current parser position
  95. unsigned m_mark_count; // how many sub-expressions we have
  96. int m_mark_reset; // used to indicate that we're inside a (?|...) block.
  97. unsigned m_max_mark; // largest mark count seen inside a (?|...) block.
  98. std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
  99. std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
  100. bool m_has_case_change; // true if somewhere in the current block the case has changed
  101. unsigned m_recursion_count; // How many times we've called parse_all.
  102. #if defined(BOOST_MSVC) && defined(_M_IX86)
  103. // This is an ugly warning suppression workaround (for warnings *inside* std::vector
  104. // that can not otherwise be suppressed)...
  105. BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
  106. std::vector<long> m_alt_jumps; // list of alternative in the current scope.
  107. #else
  108. std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
  109. #endif
  110. basic_regex_parser& operator=(const basic_regex_parser&);
  111. basic_regex_parser(const basic_regex_parser&);
  112. };
  113. template <class charT, class traits>
  114. basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
  115. : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false), m_recursion_count(0)
  116. {
  117. }
  118. template <class charT, class traits>
  119. void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
  120. {
  121. // pass l_flags on to base class:
  122. this->init(l_flags);
  123. // set up pointers:
  124. m_position = m_base = p1;
  125. m_end = p2;
  126. // empty strings are errors:
  127. if((p1 == p2) &&
  128. (
  129. ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
  130. || (l_flags & regbase::no_empty_expressions)
  131. )
  132. )
  133. {
  134. fail(regex_constants::error_empty, 0);
  135. return;
  136. }
  137. // select which parser to use:
  138. switch(l_flags & regbase::main_option_type)
  139. {
  140. case regbase::perl_syntax_group:
  141. {
  142. m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
  143. //
  144. // Add a leading paren with index zero to give recursions a target:
  145. //
  146. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  147. br->index = 0;
  148. br->icase = this->flags() & regbase::icase;
  149. break;
  150. }
  151. case regbase::basic_syntax_group:
  152. m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
  153. break;
  154. case regbase::literal:
  155. m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
  156. break;
  157. default:
  158. // Ooops, someone has managed to set more than one of the main option flags,
  159. // so this must be an error:
  160. fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
  161. return;
  162. }
  163. // parse all our characters:
  164. bool result = parse_all();
  165. //
  166. // Unwind our alternatives:
  167. //
  168. unwind_alts(-1);
  169. // reset l_flags as a global scope (?imsx) may have altered them:
  170. this->flags(l_flags);
  171. // if we haven't gobbled up all the characters then we must
  172. // have had an unexpected ')' :
  173. if(!result)
  174. {
  175. fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Found a closing ) with no corresponding opening parenthesis.");
  176. return;
  177. }
  178. // if an error has been set then give up now:
  179. if(this->m_pdata->m_status)
  180. return;
  181. // fill in our sub-expression count:
  182. this->m_pdata->m_mark_count = 1 + m_mark_count;
  183. this->finalize(p1, p2);
  184. }
  185. template <class charT, class traits>
  186. void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
  187. {
  188. // get the error message:
  189. std::string message = this->m_pdata->m_ptraits->error_string(error_code);
  190. fail(error_code, position, message);
  191. }
  192. template <class charT, class traits>
  193. void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
  194. {
  195. if(0 == this->m_pdata->m_status) // update the error code if not already set
  196. this->m_pdata->m_status = error_code;
  197. m_position = m_end; // don't bother parsing anything else
  198. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  199. //
  200. // Augment error message with the regular expression text:
  201. //
  202. if(start_pos == position)
  203. start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
  204. std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
  205. if(error_code != regex_constants::error_empty)
  206. {
  207. if((start_pos != 0) || (end_pos != (m_end - m_base)))
  208. message += " The error occurred while parsing the regular expression fragment: '";
  209. else
  210. message += " The error occurred while parsing the regular expression: '";
  211. if(start_pos != end_pos)
  212. {
  213. message += std::string(m_base + start_pos, m_base + position);
  214. message += ">>>HERE>>>";
  215. message += std::string(m_base + position, m_base + end_pos);
  216. }
  217. message += "'.";
  218. }
  219. #endif
  220. #ifndef BOOST_NO_EXCEPTIONS
  221. if(0 == (this->flags() & regex_constants::no_except))
  222. {
  223. boost::regex_error e(message, error_code, position);
  224. e.raise();
  225. }
  226. #else
  227. (void)position; // suppress warnings.
  228. #endif
  229. }
  230. template <class charT, class traits>
  231. bool basic_regex_parser<charT, traits>::parse_all()
  232. {
  233. if (++m_recursion_count > 400)
  234. {
  235. // exceeded internal limits
  236. fail(boost::regex_constants::error_complexity, m_position - m_base, "Exceeded nested brace limit.");
  237. }
  238. bool result = true;
  239. while(result && (m_position != m_end))
  240. {
  241. result = (this->*m_parser_proc)();
  242. }
  243. --m_recursion_count;
  244. return result;
  245. }
  246. #ifdef BOOST_MSVC
  247. #pragma warning(push)
  248. #pragma warning(disable:4702)
  249. #endif
  250. template <class charT, class traits>
  251. bool basic_regex_parser<charT, traits>::parse_basic()
  252. {
  253. switch(this->m_traits.syntax_type(*m_position))
  254. {
  255. case regex_constants::syntax_escape:
  256. return parse_basic_escape();
  257. case regex_constants::syntax_dot:
  258. return parse_match_any();
  259. case regex_constants::syntax_caret:
  260. ++m_position;
  261. this->append_state(syntax_element_start_line);
  262. break;
  263. case regex_constants::syntax_dollar:
  264. ++m_position;
  265. this->append_state(syntax_element_end_line);
  266. break;
  267. case regex_constants::syntax_star:
  268. if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
  269. return parse_literal();
  270. else
  271. {
  272. ++m_position;
  273. return parse_repeat();
  274. }
  275. case regex_constants::syntax_plus:
  276. if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
  277. return parse_literal();
  278. else
  279. {
  280. ++m_position;
  281. return parse_repeat(1);
  282. }
  283. case regex_constants::syntax_question:
  284. if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
  285. return parse_literal();
  286. else
  287. {
  288. ++m_position;
  289. return parse_repeat(0, 1);
  290. }
  291. case regex_constants::syntax_open_set:
  292. return parse_set();
  293. case regex_constants::syntax_newline:
  294. if(this->flags() & regbase::newline_alt)
  295. return parse_alt();
  296. else
  297. return parse_literal();
  298. default:
  299. return parse_literal();
  300. }
  301. return true;
  302. }
  303. template <class charT, class traits>
  304. bool basic_regex_parser<charT, traits>::parse_extended()
  305. {
  306. bool result = true;
  307. switch(this->m_traits.syntax_type(*m_position))
  308. {
  309. case regex_constants::syntax_open_mark:
  310. return parse_open_paren();
  311. case regex_constants::syntax_close_mark:
  312. return false;
  313. case regex_constants::syntax_escape:
  314. return parse_extended_escape();
  315. case regex_constants::syntax_dot:
  316. return parse_match_any();
  317. case regex_constants::syntax_caret:
  318. ++m_position;
  319. this->append_state(
  320. (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
  321. break;
  322. case regex_constants::syntax_dollar:
  323. ++m_position;
  324. this->append_state(
  325. (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
  326. break;
  327. case regex_constants::syntax_star:
  328. if(m_position == this->m_base)
  329. {
  330. fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
  331. return false;
  332. }
  333. ++m_position;
  334. return parse_repeat();
  335. case regex_constants::syntax_question:
  336. if(m_position == this->m_base)
  337. {
  338. fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
  339. return false;
  340. }
  341. ++m_position;
  342. return parse_repeat(0,1);
  343. case regex_constants::syntax_plus:
  344. if(m_position == this->m_base)
  345. {
  346. fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
  347. return false;
  348. }
  349. ++m_position;
  350. return parse_repeat(1);
  351. case regex_constants::syntax_open_brace:
  352. ++m_position;
  353. return parse_repeat_range(false);
  354. case regex_constants::syntax_close_brace:
  355. if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex)
  356. {
  357. fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
  358. return false;
  359. }
  360. result = parse_literal();
  361. break;
  362. case regex_constants::syntax_or:
  363. return parse_alt();
  364. case regex_constants::syntax_open_set:
  365. return parse_set();
  366. case regex_constants::syntax_newline:
  367. if(this->flags() & regbase::newline_alt)
  368. return parse_alt();
  369. else
  370. return parse_literal();
  371. case regex_constants::syntax_hash:
  372. //
  373. // If we have a mod_x flag set, then skip until
  374. // we get to a newline character:
  375. //
  376. if((this->flags()
  377. & (regbase::no_perl_ex|regbase::mod_x))
  378. == regbase::mod_x)
  379. {
  380. while((m_position != m_end) && !is_separator(*m_position++)){}
  381. return true;
  382. }
  383. BOOST_FALLTHROUGH;
  384. default:
  385. result = parse_literal();
  386. break;
  387. }
  388. return result;
  389. }
  390. #ifdef BOOST_MSVC
  391. #pragma warning(pop)
  392. #endif
  393. template <class charT, class traits>
  394. bool basic_regex_parser<charT, traits>::parse_literal()
  395. {
  396. // append this as a literal provided it's not a space character
  397. // or the perl option regbase::mod_x is not set:
  398. if(
  399. ((this->flags()
  400. & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
  401. != regbase::mod_x)
  402. || !this->m_traits.isctype(*m_position, this->m_mask_space))
  403. this->append_literal(*m_position);
  404. ++m_position;
  405. return true;
  406. }
  407. template <class charT, class traits>
  408. bool basic_regex_parser<charT, traits>::parse_open_paren()
  409. {
  410. //
  411. // skip the '(' and error check:
  412. //
  413. if(++m_position == m_end)
  414. {
  415. fail(regex_constants::error_paren, m_position - m_base);
  416. return false;
  417. }
  418. //
  419. // begin by checking for a perl-style (?...) extension:
  420. //
  421. if(
  422. ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
  423. || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
  424. )
  425. {
  426. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
  427. return parse_perl_extension();
  428. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star)
  429. return parse_perl_verb();
  430. }
  431. //
  432. // update our mark count, and append the required state:
  433. //
  434. unsigned markid = 0;
  435. if(0 == (this->flags() & regbase::nosubs))
  436. {
  437. markid = ++m_mark_count;
  438. #ifndef BOOST_NO_STD_DISTANCE
  439. if(this->flags() & regbase::save_subexpression_location)
  440. this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
  441. #else
  442. if(this->flags() & regbase::save_subexpression_location)
  443. this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0));
  444. #endif
  445. }
  446. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  447. pb->index = markid;
  448. pb->icase = this->flags() & regbase::icase;
  449. std::ptrdiff_t last_paren_start = this->getoffset(pb);
  450. // back up insertion point for alternations, and set new point:
  451. std::ptrdiff_t last_alt_point = m_alt_insert_point;
  452. this->m_pdata->m_data.align();
  453. m_alt_insert_point = this->m_pdata->m_data.size();
  454. //
  455. // back up the current flags in case we have a nested (?imsx) group:
  456. //
  457. regex_constants::syntax_option_type opts = this->flags();
  458. bool old_case_change = m_has_case_change;
  459. m_has_case_change = false; // no changes to this scope as yet...
  460. //
  461. // Back up branch reset data in case we have a nested (?|...)
  462. //
  463. int mark_reset = m_mark_reset;
  464. m_mark_reset = -1;
  465. //
  466. // now recursively add more states, this will terminate when we get to a
  467. // matching ')' :
  468. //
  469. parse_all();
  470. //
  471. // Unwind pushed alternatives:
  472. //
  473. if(0 == unwind_alts(last_paren_start))
  474. return false;
  475. //
  476. // restore flags:
  477. //
  478. if(m_has_case_change)
  479. {
  480. // the case has changed in one or more of the alternatives
  481. // within the scoped (...) block: we have to add a state
  482. // to reset the case sensitivity:
  483. static_cast<re_case*>(
  484. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  485. )->icase = opts & regbase::icase;
  486. }
  487. this->flags(opts);
  488. m_has_case_change = old_case_change;
  489. //
  490. // restore branch reset:
  491. //
  492. m_mark_reset = mark_reset;
  493. //
  494. // we either have a ')' or we have run out of characters prematurely:
  495. //
  496. if(m_position == m_end)
  497. {
  498. this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
  499. return false;
  500. }
  501. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  502. return false;
  503. #ifndef BOOST_NO_STD_DISTANCE
  504. if(markid && (this->flags() & regbase::save_subexpression_location))
  505. this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
  506. #else
  507. if(markid && (this->flags() & regbase::save_subexpression_location))
  508. this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base);
  509. #endif
  510. ++m_position;
  511. //
  512. // append closing parenthesis state:
  513. //
  514. pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
  515. pb->index = markid;
  516. pb->icase = this->flags() & regbase::icase;
  517. this->m_paren_start = last_paren_start;
  518. //
  519. // restore the alternate insertion point:
  520. //
  521. this->m_alt_insert_point = last_alt_point;
  522. //
  523. // allow backrefs to this mark:
  524. //
  525. if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
  526. this->m_backrefs |= 1u << (markid - 1);
  527. return true;
  528. }
  529. template <class charT, class traits>
  530. bool basic_regex_parser<charT, traits>::parse_basic_escape()
  531. {
  532. if(++m_position == m_end)
  533. {
  534. fail(regex_constants::error_paren, m_position - m_base);
  535. return false;
  536. }
  537. bool result = true;
  538. switch(this->m_traits.escape_syntax_type(*m_position))
  539. {
  540. case regex_constants::syntax_open_mark:
  541. return parse_open_paren();
  542. case regex_constants::syntax_close_mark:
  543. return false;
  544. case regex_constants::syntax_plus:
  545. if(this->flags() & regex_constants::bk_plus_qm)
  546. {
  547. ++m_position;
  548. return parse_repeat(1);
  549. }
  550. else
  551. return parse_literal();
  552. case regex_constants::syntax_question:
  553. if(this->flags() & regex_constants::bk_plus_qm)
  554. {
  555. ++m_position;
  556. return parse_repeat(0, 1);
  557. }
  558. else
  559. return parse_literal();
  560. case regex_constants::syntax_open_brace:
  561. if(this->flags() & regbase::no_intervals)
  562. return parse_literal();
  563. ++m_position;
  564. return parse_repeat_range(true);
  565. case regex_constants::syntax_close_brace:
  566. if(this->flags() & regbase::no_intervals)
  567. return parse_literal();
  568. fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
  569. return false;
  570. case regex_constants::syntax_or:
  571. if(this->flags() & regbase::bk_vbar)
  572. return parse_alt();
  573. else
  574. result = parse_literal();
  575. break;
  576. case regex_constants::syntax_digit:
  577. return parse_backref();
  578. case regex_constants::escape_type_start_buffer:
  579. if(this->flags() & regbase::emacs_ex)
  580. {
  581. ++m_position;
  582. this->append_state(syntax_element_buffer_start);
  583. }
  584. else
  585. result = parse_literal();
  586. break;
  587. case regex_constants::escape_type_end_buffer:
  588. if(this->flags() & regbase::emacs_ex)
  589. {
  590. ++m_position;
  591. this->append_state(syntax_element_buffer_end);
  592. }
  593. else
  594. result = parse_literal();
  595. break;
  596. case regex_constants::escape_type_word_assert:
  597. if(this->flags() & regbase::emacs_ex)
  598. {
  599. ++m_position;
  600. this->append_state(syntax_element_word_boundary);
  601. }
  602. else
  603. result = parse_literal();
  604. break;
  605. case regex_constants::escape_type_not_word_assert:
  606. if(this->flags() & regbase::emacs_ex)
  607. {
  608. ++m_position;
  609. this->append_state(syntax_element_within_word);
  610. }
  611. else
  612. result = parse_literal();
  613. break;
  614. case regex_constants::escape_type_left_word:
  615. if(this->flags() & regbase::emacs_ex)
  616. {
  617. ++m_position;
  618. this->append_state(syntax_element_word_start);
  619. }
  620. else
  621. result = parse_literal();
  622. break;
  623. case regex_constants::escape_type_right_word:
  624. if(this->flags() & regbase::emacs_ex)
  625. {
  626. ++m_position;
  627. this->append_state(syntax_element_word_end);
  628. }
  629. else
  630. result = parse_literal();
  631. break;
  632. default:
  633. if(this->flags() & regbase::emacs_ex)
  634. {
  635. bool negate = true;
  636. switch(*m_position)
  637. {
  638. case 'w':
  639. negate = false;
  640. BOOST_FALLTHROUGH;
  641. case 'W':
  642. {
  643. basic_char_set<charT, traits> char_set;
  644. if(negate)
  645. char_set.negate();
  646. char_set.add_class(this->m_word_mask);
  647. if(0 == this->append_set(char_set))
  648. {
  649. fail(regex_constants::error_ctype, m_position - m_base);
  650. return false;
  651. }
  652. ++m_position;
  653. return true;
  654. }
  655. case 's':
  656. negate = false;
  657. BOOST_FALLTHROUGH;
  658. case 'S':
  659. return add_emacs_code(negate);
  660. case 'c':
  661. case 'C':
  662. // not supported yet:
  663. fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
  664. return false;
  665. default:
  666. break;
  667. }
  668. }
  669. result = parse_literal();
  670. break;
  671. }
  672. return result;
  673. }
  674. template <class charT, class traits>
  675. bool basic_regex_parser<charT, traits>::parse_extended_escape()
  676. {
  677. ++m_position;
  678. if(m_position == m_end)
  679. {
  680. fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
  681. return false;
  682. }
  683. bool negate = false; // in case this is a character class escape: \w \d etc
  684. switch(this->m_traits.escape_syntax_type(*m_position))
  685. {
  686. case regex_constants::escape_type_not_class:
  687. negate = true;
  688. BOOST_FALLTHROUGH;
  689. case regex_constants::escape_type_class:
  690. {
  691. escape_type_class_jump:
  692. typedef typename traits::char_class_type m_type;
  693. m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
  694. if(m != 0)
  695. {
  696. basic_char_set<charT, traits> char_set;
  697. if(negate)
  698. char_set.negate();
  699. char_set.add_class(m);
  700. if(0 == this->append_set(char_set))
  701. {
  702. fail(regex_constants::error_ctype, m_position - m_base);
  703. return false;
  704. }
  705. ++m_position;
  706. return true;
  707. }
  708. //
  709. // not a class, just a regular unknown escape:
  710. //
  711. this->append_literal(unescape_character());
  712. break;
  713. }
  714. case regex_constants::syntax_digit:
  715. return parse_backref();
  716. case regex_constants::escape_type_left_word:
  717. ++m_position;
  718. this->append_state(syntax_element_word_start);
  719. break;
  720. case regex_constants::escape_type_right_word:
  721. ++m_position;
  722. this->append_state(syntax_element_word_end);
  723. break;
  724. case regex_constants::escape_type_start_buffer:
  725. ++m_position;
  726. this->append_state(syntax_element_buffer_start);
  727. break;
  728. case regex_constants::escape_type_end_buffer:
  729. ++m_position;
  730. this->append_state(syntax_element_buffer_end);
  731. break;
  732. case regex_constants::escape_type_word_assert:
  733. ++m_position;
  734. this->append_state(syntax_element_word_boundary);
  735. break;
  736. case regex_constants::escape_type_not_word_assert:
  737. ++m_position;
  738. this->append_state(syntax_element_within_word);
  739. break;
  740. case regex_constants::escape_type_Z:
  741. ++m_position;
  742. this->append_state(syntax_element_soft_buffer_end);
  743. break;
  744. case regex_constants::escape_type_Q:
  745. return parse_QE();
  746. case regex_constants::escape_type_C:
  747. return parse_match_any();
  748. case regex_constants::escape_type_X:
  749. ++m_position;
  750. this->append_state(syntax_element_combining);
  751. break;
  752. case regex_constants::escape_type_G:
  753. ++m_position;
  754. this->append_state(syntax_element_restart_continue);
  755. break;
  756. case regex_constants::escape_type_not_property:
  757. negate = true;
  758. BOOST_FALLTHROUGH;
  759. case regex_constants::escape_type_property:
  760. {
  761. ++m_position;
  762. char_class_type m;
  763. if(m_position == m_end)
  764. {
  765. fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
  766. return false;
  767. }
  768. // maybe have \p{ddd}
  769. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
  770. {
  771. const charT* base = m_position;
  772. // skip forward until we find enclosing brace:
  773. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
  774. ++m_position;
  775. if(m_position == m_end)
  776. {
  777. fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
  778. return false;
  779. }
  780. m = this->m_traits.lookup_classname(++base, m_position++);
  781. }
  782. else
  783. {
  784. m = this->m_traits.lookup_classname(m_position, m_position+1);
  785. ++m_position;
  786. }
  787. if(m != 0)
  788. {
  789. basic_char_set<charT, traits> char_set;
  790. if(negate)
  791. char_set.negate();
  792. char_set.add_class(m);
  793. if(0 == this->append_set(char_set))
  794. {
  795. fail(regex_constants::error_ctype, m_position - m_base);
  796. return false;
  797. }
  798. return true;
  799. }
  800. fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
  801. return false;
  802. }
  803. case regex_constants::escape_type_reset_start_mark:
  804. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  805. {
  806. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  807. pb->index = -5;
  808. pb->icase = this->flags() & regbase::icase;
  809. this->m_pdata->m_data.align();
  810. ++m_position;
  811. return true;
  812. }
  813. goto escape_type_class_jump;
  814. case regex_constants::escape_type_line_ending:
  815. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  816. {
  817. const charT* e = get_escape_R_string<charT>();
  818. const charT* old_position = m_position;
  819. const charT* old_end = m_end;
  820. const charT* old_base = m_base;
  821. m_position = e;
  822. m_base = e;
  823. m_end = e + traits::length(e);
  824. bool r = parse_all();
  825. m_position = ++old_position;
  826. m_end = old_end;
  827. m_base = old_base;
  828. return r;
  829. }
  830. goto escape_type_class_jump;
  831. case regex_constants::escape_type_extended_backref:
  832. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  833. {
  834. bool have_brace = false;
  835. bool negative = false;
  836. static const char* incomplete_message = "Incomplete \\g escape found.";
  837. if(++m_position == m_end)
  838. {
  839. fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
  840. return false;
  841. }
  842. // maybe have \g{ddd}
  843. regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
  844. regex_constants::syntax_type syn_end = 0;
  845. if((syn == regex_constants::syntax_open_brace)
  846. || (syn == regex_constants::escape_type_left_word)
  847. || (syn == regex_constants::escape_type_end_buffer))
  848. {
  849. if(++m_position == m_end)
  850. {
  851. fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
  852. return false;
  853. }
  854. have_brace = true;
  855. switch(syn)
  856. {
  857. case regex_constants::syntax_open_brace:
  858. syn_end = regex_constants::syntax_close_brace;
  859. break;
  860. case regex_constants::escape_type_left_word:
  861. syn_end = regex_constants::escape_type_right_word;
  862. break;
  863. default:
  864. syn_end = regex_constants::escape_type_end_buffer;
  865. break;
  866. }
  867. }
  868. negative = (*m_position == static_cast<charT>('-'));
  869. if((negative) && (++m_position == m_end))
  870. {
  871. fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
  872. return false;
  873. }
  874. const charT* pc = m_position;
  875. boost::intmax_t i = this->m_traits.toi(pc, m_end, 10);
  876. if((i < 0) && syn_end)
  877. {
  878. // Check for a named capture, get the leftmost one if there is more than one:
  879. const charT* base = m_position;
  880. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
  881. {
  882. ++m_position;
  883. }
  884. i = hash_value_from_capture_name(base, m_position);
  885. pc = m_position;
  886. }
  887. if(negative)
  888. i = 1 + m_mark_count - i;
  889. if(((i > 0) && (i < std::numeric_limits<unsigned>::digits) && (i - 1 < static_cast<boost::intmax_t>(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_pdata->get_id(i)-1 < static_cast<boost::intmax_t>(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
  890. {
  891. m_position = pc;
  892. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
  893. pb->index = i;
  894. pb->icase = this->flags() & regbase::icase;
  895. }
  896. else
  897. {
  898. fail(regex_constants::error_backref, m_position - m_base);
  899. return false;
  900. }
  901. m_position = pc;
  902. if(have_brace)
  903. {
  904. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
  905. {
  906. fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
  907. return false;
  908. }
  909. ++m_position;
  910. }
  911. return true;
  912. }
  913. goto escape_type_class_jump;
  914. case regex_constants::escape_type_control_v:
  915. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  916. goto escape_type_class_jump;
  917. BOOST_FALLTHROUGH;
  918. default:
  919. this->append_literal(unescape_character());
  920. break;
  921. }
  922. return true;
  923. }
  924. template <class charT, class traits>
  925. bool basic_regex_parser<charT, traits>::parse_match_any()
  926. {
  927. //
  928. // we have a '.' that can match any character:
  929. //
  930. ++m_position;
  931. static_cast<re_dot*>(
  932. this->append_state(syntax_element_wild, sizeof(re_dot))
  933. )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
  934. ? BOOST_REGEX_DETAIL_NS::force_not_newline
  935. : this->flags() & regbase::mod_s ?
  936. BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care);
  937. return true;
  938. }
  939. template <class charT, class traits>
  940. bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
  941. {
  942. bool greedy = true;
  943. bool pocessive = false;
  944. std::size_t insert_point;
  945. //
  946. // when we get to here we may have a non-greedy ? mark still to come:
  947. //
  948. if((m_position != m_end)
  949. && (
  950. (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  951. || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
  952. )
  953. )
  954. {
  955. // OK we have a perl or emacs regex, check for a '?':
  956. if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
  957. {
  958. // whitespace skip:
  959. while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  960. ++m_position;
  961. }
  962. if((m_position != m_end) && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question))
  963. {
  964. greedy = false;
  965. ++m_position;
  966. }
  967. // for perl regexes only check for pocessive ++ repeats.
  968. if((m_position != m_end)
  969. && (0 == (this->flags() & regbase::main_option_type))
  970. && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
  971. {
  972. pocessive = true;
  973. ++m_position;
  974. }
  975. }
  976. if(0 == this->m_last_state)
  977. {
  978. fail(regex_constants::error_badrepeat, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Nothing to repeat.");
  979. return false;
  980. }
  981. if(this->m_last_state->type == syntax_element_endmark)
  982. {
  983. // insert a repeat before the '(' matching the last ')':
  984. insert_point = this->m_paren_start;
  985. }
  986. else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
  987. {
  988. // the last state was a literal with more than one character, split it in two:
  989. re_literal* lit = static_cast<re_literal*>(this->m_last_state);
  990. charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
  991. lit->length -= 1;
  992. // now append new state:
  993. lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
  994. lit->length = 1;
  995. (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
  996. insert_point = this->getoffset(this->m_last_state);
  997. }
  998. else
  999. {
  1000. // repeat the last state whatever it was, need to add some error checking here:
  1001. switch(this->m_last_state->type)
  1002. {
  1003. case syntax_element_start_line:
  1004. case syntax_element_end_line:
  1005. case syntax_element_word_boundary:
  1006. case syntax_element_within_word:
  1007. case syntax_element_word_start:
  1008. case syntax_element_word_end:
  1009. case syntax_element_buffer_start:
  1010. case syntax_element_buffer_end:
  1011. case syntax_element_alt:
  1012. case syntax_element_soft_buffer_end:
  1013. case syntax_element_restart_continue:
  1014. case syntax_element_jump:
  1015. case syntax_element_startmark:
  1016. case syntax_element_backstep:
  1017. // can't legally repeat any of the above:
  1018. fail(regex_constants::error_badrepeat, m_position - m_base);
  1019. return false;
  1020. default:
  1021. // do nothing...
  1022. break;
  1023. }
  1024. insert_point = this->getoffset(this->m_last_state);
  1025. }
  1026. //
  1027. // OK we now know what to repeat, so insert the repeat around it:
  1028. //
  1029. re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
  1030. rep->min = low;
  1031. rep->max = high;
  1032. rep->greedy = greedy;
  1033. rep->leading = false;
  1034. // store our repeater position for later:
  1035. std::ptrdiff_t rep_off = this->getoffset(rep);
  1036. // and append a back jump to the repeat:
  1037. re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
  1038. jmp->alt.i = rep_off - this->getoffset(jmp);
  1039. this->m_pdata->m_data.align();
  1040. // now fill in the alt jump for the repeat:
  1041. rep = static_cast<re_repeat*>(this->getaddress(rep_off));
  1042. rep->alt.i = this->m_pdata->m_data.size() - rep_off;
  1043. //
  1044. // If the repeat is pocessive then bracket the repeat with a (?>...)
  1045. // independent sub-expression construct:
  1046. //
  1047. if(pocessive)
  1048. {
  1049. if(m_position != m_end)
  1050. {
  1051. //
  1052. // Check for illegal following quantifier, we have to do this here, because
  1053. // the extra states we insert below circumvents our usual error checking :-(
  1054. //
  1055. bool contin = false;
  1056. do
  1057. {
  1058. if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
  1059. {
  1060. // whitespace skip:
  1061. while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1062. ++m_position;
  1063. }
  1064. if (m_position != m_end)
  1065. {
  1066. switch (this->m_traits.syntax_type(*m_position))
  1067. {
  1068. case regex_constants::syntax_star:
  1069. case regex_constants::syntax_plus:
  1070. case regex_constants::syntax_question:
  1071. case regex_constants::syntax_open_brace:
  1072. fail(regex_constants::error_badrepeat, m_position - m_base);
  1073. return false;
  1074. case regex_constants::syntax_open_mark:
  1075. // Do we have a comment? If so we need to skip it here...
  1076. if ((m_position + 2 < m_end) && this->m_traits.syntax_type(*(m_position + 1)) == regex_constants::syntax_question
  1077. && this->m_traits.syntax_type(*(m_position + 2)) == regex_constants::syntax_hash)
  1078. {
  1079. while ((m_position != m_end)
  1080. && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark)) {
  1081. }
  1082. contin = true;
  1083. }
  1084. else
  1085. contin = false;
  1086. }
  1087. }
  1088. else
  1089. contin = false;
  1090. } while (contin);
  1091. }
  1092. re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
  1093. pb->index = -3;
  1094. pb->icase = this->flags() & regbase::icase;
  1095. jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
  1096. this->m_pdata->m_data.align();
  1097. jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
  1098. pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
  1099. pb->index = -3;
  1100. pb->icase = this->flags() & regbase::icase;
  1101. }
  1102. return true;
  1103. }
  1104. template <class charT, class traits>
  1105. bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
  1106. {
  1107. static const char* incomplete_message = "Missing } in quantified repetition.";
  1108. //
  1109. // parse a repeat-range:
  1110. //
  1111. std::size_t min, max;
  1112. boost::intmax_t v;
  1113. // skip whitespace:
  1114. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1115. ++m_position;
  1116. if(this->m_position == this->m_end)
  1117. {
  1118. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1119. {
  1120. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1121. return false;
  1122. }
  1123. // Treat the opening '{' as a literal character, rewind to start of error:
  1124. --m_position;
  1125. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1126. return parse_literal();
  1127. }
  1128. // get min:
  1129. v = this->m_traits.toi(m_position, m_end, 10);
  1130. // skip whitespace:
  1131. if((v < 0) || (v > umax()))
  1132. {
  1133. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1134. {
  1135. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1136. return false;
  1137. }
  1138. // Treat the opening '{' as a literal character, rewind to start of error:
  1139. --m_position;
  1140. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1141. return parse_literal();
  1142. }
  1143. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1144. ++m_position;
  1145. if(this->m_position == this->m_end)
  1146. {
  1147. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1148. {
  1149. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1150. return false;
  1151. }
  1152. // Treat the opening '{' as a literal character, rewind to start of error:
  1153. --m_position;
  1154. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1155. return parse_literal();
  1156. }
  1157. min = static_cast<std::size_t>(v);
  1158. // see if we have a comma:
  1159. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
  1160. {
  1161. // move on and error check:
  1162. ++m_position;
  1163. // skip whitespace:
  1164. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1165. ++m_position;
  1166. if(this->m_position == this->m_end)
  1167. {
  1168. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1169. {
  1170. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1171. return false;
  1172. }
  1173. // Treat the opening '{' as a literal character, rewind to start of error:
  1174. --m_position;
  1175. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1176. return parse_literal();
  1177. }
  1178. // get the value if any:
  1179. v = this->m_traits.toi(m_position, m_end, 10);
  1180. max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
  1181. }
  1182. else
  1183. {
  1184. // no comma, max = min:
  1185. max = min;
  1186. }
  1187. // skip whitespace:
  1188. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1189. ++m_position;
  1190. // OK now check trailing }:
  1191. if(this->m_position == this->m_end)
  1192. {
  1193. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1194. {
  1195. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1196. return false;
  1197. }
  1198. // Treat the opening '{' as a literal character, rewind to start of error:
  1199. --m_position;
  1200. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1201. return parse_literal();
  1202. }
  1203. if(isbasic)
  1204. {
  1205. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
  1206. {
  1207. ++m_position;
  1208. if(this->m_position == this->m_end)
  1209. {
  1210. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1211. return false;
  1212. }
  1213. }
  1214. else
  1215. {
  1216. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1217. return false;
  1218. }
  1219. }
  1220. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
  1221. ++m_position;
  1222. else
  1223. {
  1224. // Treat the opening '{' as a literal character, rewind to start of error:
  1225. --m_position;
  1226. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1227. return parse_literal();
  1228. }
  1229. //
  1230. // finally go and add the repeat, unless error:
  1231. //
  1232. if(min > max)
  1233. {
  1234. // Backtrack to error location:
  1235. m_position -= 2;
  1236. while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
  1237. ++m_position;
  1238. fail(regex_constants::error_badbrace, m_position - m_base);
  1239. return false;
  1240. }
  1241. return parse_repeat(min, max);
  1242. }
  1243. template <class charT, class traits>
  1244. bool basic_regex_parser<charT, traits>::parse_alt()
  1245. {
  1246. //
  1247. // error check: if there have been no previous states,
  1248. // or if the last state was a '(' then error:
  1249. //
  1250. if(
  1251. ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
  1252. &&
  1253. !(
  1254. ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
  1255. &&
  1256. ((this->flags() & regbase::no_empty_expressions) == 0)
  1257. )
  1258. )
  1259. {
  1260. fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator |.");
  1261. return false;
  1262. }
  1263. //
  1264. // Reset mark count if required:
  1265. //
  1266. if(m_max_mark < m_mark_count)
  1267. m_max_mark = m_mark_count;
  1268. if(m_mark_reset >= 0)
  1269. m_mark_count = m_mark_reset;
  1270. ++m_position;
  1271. //
  1272. // we need to append a trailing jump:
  1273. //
  1274. re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump));
  1275. std::ptrdiff_t jump_offset = this->getoffset(pj);
  1276. //
  1277. // now insert the alternative:
  1278. //
  1279. re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
  1280. jump_offset += re_alt_size;
  1281. this->m_pdata->m_data.align();
  1282. palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
  1283. //
  1284. // update m_alt_insert_point so that the next alternate gets
  1285. // inserted at the start of the second of the two we've just created:
  1286. //
  1287. this->m_alt_insert_point = this->m_pdata->m_data.size();
  1288. //
  1289. // the start of this alternative must have a case changes state
  1290. // if the current block has messed around with case changes:
  1291. //
  1292. if(m_has_case_change)
  1293. {
  1294. static_cast<re_case*>(
  1295. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  1296. )->icase = this->m_icase;
  1297. }
  1298. //
  1299. // push the alternative onto our stack, a recursive
  1300. // implementation here is easier to understand (and faster
  1301. // as it happens), but causes all kinds of stack overflow problems
  1302. // on programs with small stacks (COM+).
  1303. //
  1304. m_alt_jumps.push_back(jump_offset);
  1305. return true;
  1306. }
  1307. template <class charT, class traits>
  1308. bool basic_regex_parser<charT, traits>::parse_set()
  1309. {
  1310. static const char* incomplete_message = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
  1311. ++m_position;
  1312. if(m_position == m_end)
  1313. {
  1314. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1315. return false;
  1316. }
  1317. basic_char_set<charT, traits> char_set;
  1318. const charT* base = m_position; // where the '[' was
  1319. const charT* item_base = m_position; // where the '[' or '^' was
  1320. while(m_position != m_end)
  1321. {
  1322. switch(this->m_traits.syntax_type(*m_position))
  1323. {
  1324. case regex_constants::syntax_caret:
  1325. if(m_position == base)
  1326. {
  1327. char_set.negate();
  1328. ++m_position;
  1329. item_base = m_position;
  1330. }
  1331. else
  1332. parse_set_literal(char_set);
  1333. break;
  1334. case regex_constants::syntax_close_set:
  1335. if(m_position == item_base)
  1336. {
  1337. parse_set_literal(char_set);
  1338. break;
  1339. }
  1340. else
  1341. {
  1342. ++m_position;
  1343. if(0 == this->append_set(char_set))
  1344. {
  1345. fail(regex_constants::error_ctype, m_position - m_base);
  1346. return false;
  1347. }
  1348. }
  1349. return true;
  1350. case regex_constants::syntax_open_set:
  1351. if(parse_inner_set(char_set))
  1352. break;
  1353. return true;
  1354. case regex_constants::syntax_escape:
  1355. {
  1356. //
  1357. // look ahead and see if this is a character class shortcut
  1358. // \d \w \s etc...
  1359. //
  1360. ++m_position;
  1361. if(this->m_traits.escape_syntax_type(*m_position)
  1362. == regex_constants::escape_type_class)
  1363. {
  1364. char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
  1365. if(m != 0)
  1366. {
  1367. char_set.add_class(m);
  1368. ++m_position;
  1369. break;
  1370. }
  1371. }
  1372. else if(this->m_traits.escape_syntax_type(*m_position)
  1373. == regex_constants::escape_type_not_class)
  1374. {
  1375. // negated character class:
  1376. char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
  1377. if(m != 0)
  1378. {
  1379. char_set.add_negated_class(m);
  1380. ++m_position;
  1381. break;
  1382. }
  1383. }
  1384. // not a character class, just a regular escape:
  1385. --m_position;
  1386. parse_set_literal(char_set);
  1387. break;
  1388. }
  1389. default:
  1390. parse_set_literal(char_set);
  1391. break;
  1392. }
  1393. }
  1394. return m_position != m_end;
  1395. }
  1396. template <class charT, class traits>
  1397. bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
  1398. {
  1399. static const char* incomplete_message = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
  1400. //
  1401. // we have either a character class [:name:]
  1402. // a collating element [.name.]
  1403. // or an equivalence class [=name=]
  1404. //
  1405. if(m_end == ++m_position)
  1406. {
  1407. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1408. return false;
  1409. }
  1410. switch(this->m_traits.syntax_type(*m_position))
  1411. {
  1412. case regex_constants::syntax_dot:
  1413. //
  1414. // a collating element is treated as a literal:
  1415. //
  1416. --m_position;
  1417. parse_set_literal(char_set);
  1418. return true;
  1419. case regex_constants::syntax_colon:
  1420. {
  1421. // check that character classes are actually enabled:
  1422. if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
  1423. == (regbase::basic_syntax_group | regbase::no_char_classes))
  1424. {
  1425. --m_position;
  1426. parse_set_literal(char_set);
  1427. return true;
  1428. }
  1429. // skip the ':'
  1430. if(m_end == ++m_position)
  1431. {
  1432. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1433. return false;
  1434. }
  1435. const charT* name_first = m_position;
  1436. // skip at least one character, then find the matching ':]'
  1437. if(m_end == ++m_position)
  1438. {
  1439. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1440. return false;
  1441. }
  1442. while((m_position != m_end)
  1443. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
  1444. ++m_position;
  1445. const charT* name_last = m_position;
  1446. if(m_end == m_position)
  1447. {
  1448. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1449. return false;
  1450. }
  1451. if((m_end == ++m_position)
  1452. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1453. {
  1454. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1455. return false;
  1456. }
  1457. //
  1458. // check for negated class:
  1459. //
  1460. bool negated = false;
  1461. if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
  1462. {
  1463. ++name_first;
  1464. negated = true;
  1465. }
  1466. typedef typename traits::char_class_type m_type;
  1467. m_type m = this->m_traits.lookup_classname(name_first, name_last);
  1468. if(m == 0)
  1469. {
  1470. if(char_set.empty() && (name_last - name_first == 1))
  1471. {
  1472. // maybe a special case:
  1473. ++m_position;
  1474. if( (m_position != m_end)
  1475. && (this->m_traits.syntax_type(*m_position)
  1476. == regex_constants::syntax_close_set))
  1477. {
  1478. if(this->m_traits.escape_syntax_type(*name_first)
  1479. == regex_constants::escape_type_left_word)
  1480. {
  1481. ++m_position;
  1482. this->append_state(syntax_element_word_start);
  1483. return false;
  1484. }
  1485. if(this->m_traits.escape_syntax_type(*name_first)
  1486. == regex_constants::escape_type_right_word)
  1487. {
  1488. ++m_position;
  1489. this->append_state(syntax_element_word_end);
  1490. return false;
  1491. }
  1492. }
  1493. }
  1494. fail(regex_constants::error_ctype, name_first - m_base);
  1495. return false;
  1496. }
  1497. if(negated == false)
  1498. char_set.add_class(m);
  1499. else
  1500. char_set.add_negated_class(m);
  1501. ++m_position;
  1502. break;
  1503. }
  1504. case regex_constants::syntax_equal:
  1505. {
  1506. // skip the '='
  1507. if(m_end == ++m_position)
  1508. {
  1509. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1510. return false;
  1511. }
  1512. const charT* name_first = m_position;
  1513. // skip at least one character, then find the matching '=]'
  1514. if(m_end == ++m_position)
  1515. {
  1516. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1517. return false;
  1518. }
  1519. while((m_position != m_end)
  1520. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
  1521. ++m_position;
  1522. const charT* name_last = m_position;
  1523. if(m_end == m_position)
  1524. {
  1525. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1526. return false;
  1527. }
  1528. if((m_end == ++m_position)
  1529. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1530. {
  1531. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1532. return false;
  1533. }
  1534. string_type m = this->m_traits.lookup_collatename(name_first, name_last);
  1535. if((0 == m.size()) || (m.size() > 2))
  1536. {
  1537. fail(regex_constants::error_collate, name_first - m_base);
  1538. return false;
  1539. }
  1540. digraph<charT> d;
  1541. d.first = m[0];
  1542. if(m.size() > 1)
  1543. d.second = m[1];
  1544. else
  1545. d.second = 0;
  1546. char_set.add_equivalent(d);
  1547. ++m_position;
  1548. break;
  1549. }
  1550. default:
  1551. --m_position;
  1552. parse_set_literal(char_set);
  1553. break;
  1554. }
  1555. return true;
  1556. }
  1557. template <class charT, class traits>
  1558. void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
  1559. {
  1560. digraph<charT> start_range(get_next_set_literal(char_set));
  1561. if(m_end == m_position)
  1562. {
  1563. fail(regex_constants::error_brack, m_position - m_base);
  1564. return;
  1565. }
  1566. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
  1567. {
  1568. // we have a range:
  1569. if(m_end == ++m_position)
  1570. {
  1571. fail(regex_constants::error_brack, m_position - m_base);
  1572. return;
  1573. }
  1574. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
  1575. {
  1576. digraph<charT> end_range = get_next_set_literal(char_set);
  1577. char_set.add_range(start_range, end_range);
  1578. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
  1579. {
  1580. if(m_end == ++m_position)
  1581. {
  1582. fail(regex_constants::error_brack, m_position - m_base);
  1583. return;
  1584. }
  1585. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
  1586. {
  1587. // trailing - :
  1588. --m_position;
  1589. return;
  1590. }
  1591. fail(regex_constants::error_range, m_position - m_base);
  1592. return;
  1593. }
  1594. return;
  1595. }
  1596. --m_position;
  1597. }
  1598. char_set.add_single(start_range);
  1599. }
  1600. template <class charT, class traits>
  1601. digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
  1602. {
  1603. digraph<charT> result;
  1604. switch(this->m_traits.syntax_type(*m_position))
  1605. {
  1606. case regex_constants::syntax_dash:
  1607. if(!char_set.empty())
  1608. {
  1609. // see if we are at the end of the set:
  1610. if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1611. {
  1612. fail(regex_constants::error_range, m_position - m_base);
  1613. return result;
  1614. }
  1615. --m_position;
  1616. }
  1617. result.first = *m_position++;
  1618. return result;
  1619. case regex_constants::syntax_escape:
  1620. // check to see if escapes are supported first:
  1621. if(this->flags() & regex_constants::no_escape_in_lists)
  1622. {
  1623. result = *m_position++;
  1624. break;
  1625. }
  1626. ++m_position;
  1627. result = unescape_character();
  1628. break;
  1629. case regex_constants::syntax_open_set:
  1630. {
  1631. if(m_end == ++m_position)
  1632. {
  1633. fail(regex_constants::error_collate, m_position - m_base);
  1634. return result;
  1635. }
  1636. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
  1637. {
  1638. --m_position;
  1639. result.first = *m_position;
  1640. ++m_position;
  1641. return result;
  1642. }
  1643. if(m_end == ++m_position)
  1644. {
  1645. fail(regex_constants::error_collate, m_position - m_base);
  1646. return result;
  1647. }
  1648. const charT* name_first = m_position;
  1649. // skip at least one character, then find the matching ':]'
  1650. if(m_end == ++m_position)
  1651. {
  1652. fail(regex_constants::error_collate, name_first - m_base);
  1653. return result;
  1654. }
  1655. while((m_position != m_end)
  1656. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
  1657. ++m_position;
  1658. const charT* name_last = m_position;
  1659. if(m_end == m_position)
  1660. {
  1661. fail(regex_constants::error_collate, name_first - m_base);
  1662. return result;
  1663. }
  1664. if((m_end == ++m_position)
  1665. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1666. {
  1667. fail(regex_constants::error_collate, name_first - m_base);
  1668. return result;
  1669. }
  1670. ++m_position;
  1671. string_type s = this->m_traits.lookup_collatename(name_first, name_last);
  1672. if(s.empty() || (s.size() > 2))
  1673. {
  1674. fail(regex_constants::error_collate, name_first - m_base);
  1675. return result;
  1676. }
  1677. result.first = s[0];
  1678. if(s.size() > 1)
  1679. result.second = s[1];
  1680. else
  1681. result.second = 0;
  1682. return result;
  1683. }
  1684. default:
  1685. result = *m_position++;
  1686. }
  1687. return result;
  1688. }
  1689. //
  1690. // does a value fit in the specified charT type?
  1691. //
  1692. template <class charT>
  1693. bool valid_value(charT, boost::intmax_t v, const mpl::true_&)
  1694. {
  1695. return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
  1696. }
  1697. template <class charT>
  1698. bool valid_value(charT, boost::intmax_t, const mpl::false_&)
  1699. {
  1700. return true; // v will alsways fit in a charT
  1701. }
  1702. template <class charT>
  1703. bool valid_value(charT c, boost::intmax_t v)
  1704. {
  1705. return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(boost::intmax_t))>());
  1706. }
  1707. template <class charT, class traits>
  1708. charT basic_regex_parser<charT, traits>::unescape_character()
  1709. {
  1710. #ifdef BOOST_MSVC
  1711. #pragma warning(push)
  1712. #pragma warning(disable:4127)
  1713. #endif
  1714. charT result(0);
  1715. if(m_position == m_end)
  1716. {
  1717. fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
  1718. return false;
  1719. }
  1720. switch(this->m_traits.escape_syntax_type(*m_position))
  1721. {
  1722. case regex_constants::escape_type_control_a:
  1723. result = charT('\a');
  1724. break;
  1725. case regex_constants::escape_type_e:
  1726. result = charT(27);
  1727. break;
  1728. case regex_constants::escape_type_control_f:
  1729. result = charT('\f');
  1730. break;
  1731. case regex_constants::escape_type_control_n:
  1732. result = charT('\n');
  1733. break;
  1734. case regex_constants::escape_type_control_r:
  1735. result = charT('\r');
  1736. break;
  1737. case regex_constants::escape_type_control_t:
  1738. result = charT('\t');
  1739. break;
  1740. case regex_constants::escape_type_control_v:
  1741. result = charT('\v');
  1742. break;
  1743. case regex_constants::escape_type_word_assert:
  1744. result = charT('\b');
  1745. break;
  1746. case regex_constants::escape_type_ascii_control:
  1747. ++m_position;
  1748. if(m_position == m_end)
  1749. {
  1750. // Rewind to start of escape:
  1751. --m_position;
  1752. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1753. fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
  1754. return result;
  1755. }
  1756. result = static_cast<charT>(*m_position % 32);
  1757. break;
  1758. case regex_constants::escape_type_hex:
  1759. ++m_position;
  1760. if(m_position == m_end)
  1761. {
  1762. // Rewind to start of escape:
  1763. --m_position;
  1764. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1765. fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
  1766. return result;
  1767. }
  1768. // maybe have \x{ddd}
  1769. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
  1770. {
  1771. ++m_position;
  1772. if(m_position == m_end)
  1773. {
  1774. // Rewind to start of escape:
  1775. --m_position;
  1776. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1777. fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
  1778. return result;
  1779. }
  1780. boost::intmax_t i = this->m_traits.toi(m_position, m_end, 16);
  1781. if((m_position == m_end)
  1782. || (i < 0)
  1783. || ((std::numeric_limits<charT>::is_specialized) && (i > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
  1784. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
  1785. {
  1786. // Rewind to start of escape:
  1787. --m_position;
  1788. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1789. fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
  1790. return result;
  1791. }
  1792. ++m_position;
  1793. result = charT(i);
  1794. }
  1795. else
  1796. {
  1797. std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
  1798. boost::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16);
  1799. if((i < 0)
  1800. || !valid_value(charT(0), i))
  1801. {
  1802. // Rewind to start of escape:
  1803. --m_position;
  1804. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1805. fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
  1806. return result;
  1807. }
  1808. result = charT(i);
  1809. }
  1810. return result;
  1811. case regex_constants::syntax_digit:
  1812. {
  1813. // an octal escape sequence, the first character must be a zero
  1814. // followed by up to 3 octal digits:
  1815. std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
  1816. const charT* bp = m_position;
  1817. boost::intmax_t val = this->m_traits.toi(bp, bp + 1, 8);
  1818. if(val != 0)
  1819. {
  1820. // Rewind to start of escape:
  1821. --m_position;
  1822. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1823. // Oops not an octal escape after all:
  1824. fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
  1825. return result;
  1826. }
  1827. val = this->m_traits.toi(m_position, m_position + len, 8);
  1828. if((val < 0) || (val > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
  1829. {
  1830. // Rewind to start of escape:
  1831. --m_position;
  1832. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1833. fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
  1834. return result;
  1835. }
  1836. return static_cast<charT>(val);
  1837. }
  1838. case regex_constants::escape_type_named_char:
  1839. {
  1840. ++m_position;
  1841. if(m_position == m_end)
  1842. {
  1843. // Rewind to start of escape:
  1844. --m_position;
  1845. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1846. fail(regex_constants::error_escape, m_position - m_base);
  1847. return false;
  1848. }
  1849. // maybe have \N{name}
  1850. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
  1851. {
  1852. const charT* base = m_position;
  1853. // skip forward until we find enclosing brace:
  1854. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
  1855. ++m_position;
  1856. if(m_position == m_end)
  1857. {
  1858. // Rewind to start of escape:
  1859. --m_position;
  1860. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1861. fail(regex_constants::error_escape, m_position - m_base);
  1862. return false;
  1863. }
  1864. string_type s = this->m_traits.lookup_collatename(++base, m_position++);
  1865. if(s.empty())
  1866. {
  1867. // Rewind to start of escape:
  1868. --m_position;
  1869. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1870. fail(regex_constants::error_collate, m_position - m_base);
  1871. return false;
  1872. }
  1873. if(s.size() == 1)
  1874. {
  1875. return s[0];
  1876. }
  1877. }
  1878. // fall through is a failure:
  1879. // Rewind to start of escape:
  1880. --m_position;
  1881. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1882. fail(regex_constants::error_escape, m_position - m_base);
  1883. return false;
  1884. }
  1885. default:
  1886. result = *m_position;
  1887. break;
  1888. }
  1889. ++m_position;
  1890. return result;
  1891. #ifdef BOOST_MSVC
  1892. #pragma warning(pop)
  1893. #endif
  1894. }
  1895. template <class charT, class traits>
  1896. bool basic_regex_parser<charT, traits>::parse_backref()
  1897. {
  1898. BOOST_ASSERT(m_position != m_end);
  1899. const charT* pc = m_position;
  1900. boost::intmax_t i = this->m_traits.toi(pc, pc + 1, 10);
  1901. if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
  1902. {
  1903. // not a backref at all but an octal escape sequence:
  1904. charT c = unescape_character();
  1905. this->append_literal(c);
  1906. }
  1907. else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
  1908. {
  1909. m_position = pc;
  1910. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
  1911. pb->index = i;
  1912. pb->icase = this->flags() & regbase::icase;
  1913. }
  1914. else
  1915. {
  1916. // Rewind to start of escape:
  1917. --m_position;
  1918. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1919. fail(regex_constants::error_backref, m_position - m_base);
  1920. return false;
  1921. }
  1922. return true;
  1923. }
  1924. template <class charT, class traits>
  1925. bool basic_regex_parser<charT, traits>::parse_QE()
  1926. {
  1927. #ifdef BOOST_MSVC
  1928. #pragma warning(push)
  1929. #pragma warning(disable:4127)
  1930. #endif
  1931. //
  1932. // parse a \Q...\E sequence:
  1933. //
  1934. ++m_position; // skip the Q
  1935. const charT* start = m_position;
  1936. const charT* end;
  1937. do
  1938. {
  1939. while((m_position != m_end)
  1940. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
  1941. ++m_position;
  1942. if(m_position == m_end)
  1943. {
  1944. // a \Q...\E sequence may terminate with the end of the expression:
  1945. end = m_position;
  1946. break;
  1947. }
  1948. if(++m_position == m_end) // skip the escape
  1949. {
  1950. fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
  1951. return false;
  1952. }
  1953. // check to see if it's a \E:
  1954. if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
  1955. {
  1956. ++m_position;
  1957. end = m_position - 2;
  1958. break;
  1959. }
  1960. // otherwise go round again:
  1961. }while(true);
  1962. //
  1963. // now add all the character between the two escapes as literals:
  1964. //
  1965. while(start != end)
  1966. {
  1967. this->append_literal(*start);
  1968. ++start;
  1969. }
  1970. return true;
  1971. #ifdef BOOST_MSVC
  1972. #pragma warning(pop)
  1973. #endif
  1974. }
  1975. template <class charT, class traits>
  1976. bool basic_regex_parser<charT, traits>::parse_perl_extension()
  1977. {
  1978. if(++m_position == m_end)
  1979. {
  1980. // Rewind to start of (? sequence:
  1981. --m_position;
  1982. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  1983. fail(regex_constants::error_perl_extension, m_position - m_base);
  1984. return false;
  1985. }
  1986. //
  1987. // treat comments as a special case, as these
  1988. // are the only ones that don't start with a leading
  1989. // startmark state:
  1990. //
  1991. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
  1992. {
  1993. while((m_position != m_end)
  1994. && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
  1995. {}
  1996. return true;
  1997. }
  1998. //
  1999. // backup some state, and prepare the way:
  2000. //
  2001. int markid = 0;
  2002. std::ptrdiff_t jump_offset = 0;
  2003. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  2004. pb->icase = this->flags() & regbase::icase;
  2005. std::ptrdiff_t last_paren_start = this->getoffset(pb);
  2006. // back up insertion point for alternations, and set new point:
  2007. std::ptrdiff_t last_alt_point = m_alt_insert_point;
  2008. this->m_pdata->m_data.align();
  2009. m_alt_insert_point = this->m_pdata->m_data.size();
  2010. std::ptrdiff_t expected_alt_point = m_alt_insert_point;
  2011. bool restore_flags = true;
  2012. regex_constants::syntax_option_type old_flags = this->flags();
  2013. bool old_case_change = m_has_case_change;
  2014. m_has_case_change = false;
  2015. charT name_delim;
  2016. int mark_reset = m_mark_reset;
  2017. int max_mark = m_max_mark;
  2018. m_mark_reset = -1;
  2019. m_max_mark = m_mark_count;
  2020. boost::intmax_t v;
  2021. //
  2022. // select the actual extension used:
  2023. //
  2024. switch(this->m_traits.syntax_type(*m_position))
  2025. {
  2026. case regex_constants::syntax_or:
  2027. m_mark_reset = m_mark_count;
  2028. BOOST_FALLTHROUGH;
  2029. case regex_constants::syntax_colon:
  2030. //
  2031. // a non-capturing mark:
  2032. //
  2033. pb->index = markid = 0;
  2034. ++m_position;
  2035. break;
  2036. case regex_constants::syntax_digit:
  2037. {
  2038. //
  2039. // a recursive subexpression:
  2040. //
  2041. v = this->m_traits.toi(m_position, m_end, 10);
  2042. if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2043. {
  2044. // Rewind to start of (? sequence:
  2045. --m_position;
  2046. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2047. fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
  2048. return false;
  2049. }
  2050. insert_recursion:
  2051. pb->index = markid = 0;
  2052. re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
  2053. pr->alt.i = v;
  2054. pr->state_id = 0;
  2055. static_cast<re_case*>(
  2056. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  2057. )->icase = this->flags() & regbase::icase;
  2058. break;
  2059. }
  2060. case regex_constants::syntax_plus:
  2061. //
  2062. // A forward-relative recursive subexpression:
  2063. //
  2064. ++m_position;
  2065. v = this->m_traits.toi(m_position, m_end, 10);
  2066. if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2067. {
  2068. // Rewind to start of (? sequence:
  2069. --m_position;
  2070. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2071. fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
  2072. return false;
  2073. }
  2074. if ((std::numeric_limits<boost::intmax_t>::max)() - m_mark_count < v)
  2075. {
  2076. fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
  2077. return false;
  2078. }
  2079. v += m_mark_count;
  2080. goto insert_recursion;
  2081. case regex_constants::syntax_dash:
  2082. //
  2083. // Possibly a backward-relative recursive subexpression:
  2084. //
  2085. ++m_position;
  2086. v = this->m_traits.toi(m_position, m_end, 10);
  2087. if(v <= 0)
  2088. {
  2089. --m_position;
  2090. // Oops not a relative recursion at all, but a (?-imsx) group:
  2091. goto option_group_jump;
  2092. }
  2093. v = m_mark_count + 1 - v;
  2094. if(v <= 0)
  2095. {
  2096. // Rewind to start of (? sequence:
  2097. --m_position;
  2098. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2099. fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
  2100. return false;
  2101. }
  2102. goto insert_recursion;
  2103. case regex_constants::syntax_equal:
  2104. pb->index = markid = -1;
  2105. ++m_position;
  2106. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  2107. this->m_pdata->m_data.align();
  2108. m_alt_insert_point = this->m_pdata->m_data.size();
  2109. break;
  2110. case regex_constants::syntax_not:
  2111. pb->index = markid = -2;
  2112. ++m_position;
  2113. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  2114. this->m_pdata->m_data.align();
  2115. m_alt_insert_point = this->m_pdata->m_data.size();
  2116. break;
  2117. case regex_constants::escape_type_left_word:
  2118. {
  2119. // a lookbehind assertion:
  2120. if(++m_position == m_end)
  2121. {
  2122. // Rewind to start of (? sequence:
  2123. --m_position;
  2124. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2125. fail(regex_constants::error_perl_extension, m_position - m_base);
  2126. return false;
  2127. }
  2128. regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
  2129. if(t == regex_constants::syntax_not)
  2130. pb->index = markid = -2;
  2131. else if(t == regex_constants::syntax_equal)
  2132. pb->index = markid = -1;
  2133. else
  2134. {
  2135. // Probably a named capture which also starts (?< :
  2136. name_delim = '>';
  2137. --m_position;
  2138. goto named_capture_jump;
  2139. }
  2140. ++m_position;
  2141. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  2142. this->append_state(syntax_element_backstep, sizeof(re_brace));
  2143. this->m_pdata->m_data.align();
  2144. m_alt_insert_point = this->m_pdata->m_data.size();
  2145. break;
  2146. }
  2147. case regex_constants::escape_type_right_word:
  2148. //
  2149. // an independent sub-expression:
  2150. //
  2151. pb->index = markid = -3;
  2152. ++m_position;
  2153. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  2154. this->m_pdata->m_data.align();
  2155. m_alt_insert_point = this->m_pdata->m_data.size();
  2156. break;
  2157. case regex_constants::syntax_open_mark:
  2158. {
  2159. // a conditional expression:
  2160. pb->index = markid = -4;
  2161. if(++m_position == m_end)
  2162. {
  2163. // Rewind to start of (? sequence:
  2164. --m_position;
  2165. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2166. fail(regex_constants::error_perl_extension, m_position - m_base);
  2167. return false;
  2168. }
  2169. v = this->m_traits.toi(m_position, m_end, 10);
  2170. if(m_position == m_end)
  2171. {
  2172. // Rewind to start of (? sequence:
  2173. --m_position;
  2174. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2175. fail(regex_constants::error_perl_extension, m_position - m_base);
  2176. return false;
  2177. }
  2178. if(*m_position == charT('R'))
  2179. {
  2180. if(++m_position == m_end)
  2181. {
  2182. // Rewind to start of (? sequence:
  2183. --m_position;
  2184. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2185. fail(regex_constants::error_perl_extension, m_position - m_base);
  2186. return false;
  2187. }
  2188. if(*m_position == charT('&'))
  2189. {
  2190. const charT* base = ++m_position;
  2191. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2192. ++m_position;
  2193. if(m_position == m_end)
  2194. {
  2195. // Rewind to start of (? sequence:
  2196. --m_position;
  2197. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2198. fail(regex_constants::error_perl_extension, m_position - m_base);
  2199. return false;
  2200. }
  2201. v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
  2202. }
  2203. else
  2204. {
  2205. v = -this->m_traits.toi(m_position, m_end, 10);
  2206. }
  2207. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2208. br->index = v < 0 ? (v - 1) : 0;
  2209. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2210. {
  2211. // Rewind to start of (? sequence:
  2212. --m_position;
  2213. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2214. fail(regex_constants::error_perl_extension, m_position - m_base);
  2215. return false;
  2216. }
  2217. if(++m_position == m_end)
  2218. {
  2219. // Rewind to start of (? sequence:
  2220. --m_position;
  2221. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2222. fail(regex_constants::error_perl_extension, m_position - m_base);
  2223. return false;
  2224. }
  2225. }
  2226. else if((*m_position == charT('\'')) || (*m_position == charT('<')))
  2227. {
  2228. const charT* base = ++m_position;
  2229. while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
  2230. ++m_position;
  2231. if(m_position == m_end)
  2232. {
  2233. // Rewind to start of (? sequence:
  2234. --m_position;
  2235. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2236. fail(regex_constants::error_perl_extension, m_position - m_base);
  2237. return false;
  2238. }
  2239. v = static_cast<int>(hash_value_from_capture_name(base, m_position));
  2240. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2241. br->index = v;
  2242. if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
  2243. {
  2244. // Rewind to start of (? sequence:
  2245. --m_position;
  2246. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2247. fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
  2248. return false;
  2249. }
  2250. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2251. {
  2252. // Rewind to start of (? sequence:
  2253. --m_position;
  2254. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2255. fail(regex_constants::error_perl_extension, m_position - m_base);
  2256. return false;
  2257. }
  2258. if(++m_position == m_end)
  2259. {
  2260. // Rewind to start of (? sequence:
  2261. --m_position;
  2262. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2263. fail(regex_constants::error_perl_extension, m_position - m_base);
  2264. return false;
  2265. }
  2266. }
  2267. else if(*m_position == charT('D'))
  2268. {
  2269. const char* def = "DEFINE";
  2270. while(*def && (m_position != m_end) && (*m_position == charT(*def)))
  2271. ++m_position, ++def;
  2272. if((m_position == m_end) || *def)
  2273. {
  2274. // Rewind to start of (? sequence:
  2275. --m_position;
  2276. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2277. fail(regex_constants::error_perl_extension, m_position - m_base);
  2278. return false;
  2279. }
  2280. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2281. br->index = 9999; // special magic value!
  2282. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2283. {
  2284. // Rewind to start of (? sequence:
  2285. --m_position;
  2286. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2287. fail(regex_constants::error_perl_extension, m_position - m_base);
  2288. return false;
  2289. }
  2290. if(++m_position == m_end)
  2291. {
  2292. // Rewind to start of (? sequence:
  2293. --m_position;
  2294. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2295. fail(regex_constants::error_perl_extension, m_position - m_base);
  2296. return false;
  2297. }
  2298. }
  2299. else if(v > 0)
  2300. {
  2301. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2302. br->index = v;
  2303. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2304. {
  2305. // Rewind to start of (? sequence:
  2306. --m_position;
  2307. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2308. fail(regex_constants::error_perl_extension, m_position - m_base);
  2309. return false;
  2310. }
  2311. if(++m_position == m_end)
  2312. {
  2313. // Rewind to start of (? sequence:
  2314. --m_position;
  2315. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2316. fail(regex_constants::error_perl_extension, m_position - m_base);
  2317. return false;
  2318. }
  2319. }
  2320. else
  2321. {
  2322. // verify that we have a lookahead or lookbehind assert:
  2323. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
  2324. {
  2325. // Rewind to start of (? sequence:
  2326. --m_position;
  2327. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2328. fail(regex_constants::error_perl_extension, m_position - m_base);
  2329. return false;
  2330. }
  2331. if(++m_position == m_end)
  2332. {
  2333. // Rewind to start of (? sequence:
  2334. --m_position;
  2335. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2336. fail(regex_constants::error_perl_extension, m_position - m_base);
  2337. return false;
  2338. }
  2339. if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
  2340. {
  2341. if(++m_position == m_end)
  2342. {
  2343. // Rewind to start of (? sequence:
  2344. --m_position;
  2345. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2346. fail(regex_constants::error_perl_extension, m_position - m_base);
  2347. return false;
  2348. }
  2349. if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
  2350. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
  2351. {
  2352. // Rewind to start of (? sequence:
  2353. --m_position;
  2354. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2355. fail(regex_constants::error_perl_extension, m_position - m_base);
  2356. return false;
  2357. }
  2358. m_position -= 3;
  2359. }
  2360. else
  2361. {
  2362. if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
  2363. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
  2364. {
  2365. // Rewind to start of (? sequence:
  2366. --m_position;
  2367. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2368. fail(regex_constants::error_perl_extension, m_position - m_base);
  2369. return false;
  2370. }
  2371. m_position -= 2;
  2372. }
  2373. }
  2374. break;
  2375. }
  2376. case regex_constants::syntax_close_mark:
  2377. // Rewind to start of (? sequence:
  2378. --m_position;
  2379. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2380. fail(regex_constants::error_perl_extension, m_position - m_base);
  2381. return false;
  2382. case regex_constants::escape_type_end_buffer:
  2383. {
  2384. name_delim = *m_position;
  2385. named_capture_jump:
  2386. markid = 0;
  2387. if(0 == (this->flags() & regbase::nosubs))
  2388. {
  2389. markid = ++m_mark_count;
  2390. #ifndef BOOST_NO_STD_DISTANCE
  2391. if(this->flags() & regbase::save_subexpression_location)
  2392. this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
  2393. #else
  2394. if(this->flags() & regbase::save_subexpression_location)
  2395. this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
  2396. #endif
  2397. }
  2398. pb->index = markid;
  2399. const charT* base = ++m_position;
  2400. if(m_position == m_end)
  2401. {
  2402. // Rewind to start of (? sequence:
  2403. --m_position;
  2404. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2405. fail(regex_constants::error_perl_extension, m_position - m_base);
  2406. return false;
  2407. }
  2408. while((m_position != m_end) && (*m_position != name_delim))
  2409. ++m_position;
  2410. if(m_position == m_end)
  2411. {
  2412. // Rewind to start of (? sequence:
  2413. --m_position;
  2414. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2415. fail(regex_constants::error_perl_extension, m_position - m_base);
  2416. return false;
  2417. }
  2418. this->m_pdata->set_name(base, m_position, markid);
  2419. ++m_position;
  2420. break;
  2421. }
  2422. default:
  2423. if(*m_position == charT('R'))
  2424. {
  2425. ++m_position;
  2426. v = 0;
  2427. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2428. {
  2429. // Rewind to start of (? sequence:
  2430. --m_position;
  2431. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2432. fail(regex_constants::error_perl_extension, m_position - m_base);
  2433. return false;
  2434. }
  2435. goto insert_recursion;
  2436. }
  2437. if(*m_position == charT('&'))
  2438. {
  2439. ++m_position;
  2440. const charT* base = m_position;
  2441. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2442. ++m_position;
  2443. if(m_position == m_end)
  2444. {
  2445. // Rewind to start of (? sequence:
  2446. --m_position;
  2447. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2448. fail(regex_constants::error_perl_extension, m_position - m_base);
  2449. return false;
  2450. }
  2451. v = static_cast<int>(hash_value_from_capture_name(base, m_position));
  2452. goto insert_recursion;
  2453. }
  2454. if(*m_position == charT('P'))
  2455. {
  2456. ++m_position;
  2457. if(m_position == m_end)
  2458. {
  2459. // Rewind to start of (? sequence:
  2460. --m_position;
  2461. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2462. fail(regex_constants::error_perl_extension, m_position - m_base);
  2463. return false;
  2464. }
  2465. if(*m_position == charT('>'))
  2466. {
  2467. ++m_position;
  2468. const charT* base = m_position;
  2469. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2470. ++m_position;
  2471. if(m_position == m_end)
  2472. {
  2473. // Rewind to start of (? sequence:
  2474. --m_position;
  2475. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2476. fail(regex_constants::error_perl_extension, m_position - m_base);
  2477. return false;
  2478. }
  2479. v = static_cast<int>(hash_value_from_capture_name(base, m_position));
  2480. goto insert_recursion;
  2481. }
  2482. }
  2483. //
  2484. // lets assume that we have a (?imsx) group and try and parse it:
  2485. //
  2486. option_group_jump:
  2487. regex_constants::syntax_option_type opts = parse_options();
  2488. if(m_position == m_end)
  2489. {
  2490. // Rewind to start of (? sequence:
  2491. --m_position;
  2492. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2493. fail(regex_constants::error_perl_extension, m_position - m_base);
  2494. return false;
  2495. }
  2496. // make a note of whether we have a case change:
  2497. m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
  2498. pb->index = markid = 0;
  2499. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
  2500. {
  2501. // update flags and carry on as normal:
  2502. this->flags(opts);
  2503. restore_flags = false;
  2504. old_case_change |= m_has_case_change; // defer end of scope by one ')'
  2505. }
  2506. else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
  2507. {
  2508. // update flags and carry on until the matching ')' is found:
  2509. this->flags(opts);
  2510. ++m_position;
  2511. }
  2512. else
  2513. {
  2514. // Rewind to start of (? sequence:
  2515. --m_position;
  2516. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2517. fail(regex_constants::error_perl_extension, m_position - m_base);
  2518. return false;
  2519. }
  2520. // finally append a case change state if we need it:
  2521. if(m_has_case_change)
  2522. {
  2523. static_cast<re_case*>(
  2524. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  2525. )->icase = opts & regbase::icase;
  2526. }
  2527. }
  2528. //
  2529. // now recursively add more states, this will terminate when we get to a
  2530. // matching ')' :
  2531. //
  2532. parse_all();
  2533. //
  2534. // Unwind alternatives:
  2535. //
  2536. if(0 == unwind_alts(last_paren_start))
  2537. {
  2538. // Rewind to start of (? sequence:
  2539. --m_position;
  2540. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2541. fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
  2542. return false;
  2543. }
  2544. //
  2545. // we either have a ')' or we have run out of characters prematurely:
  2546. //
  2547. if(m_position == m_end)
  2548. {
  2549. // Rewind to start of (? sequence:
  2550. --m_position;
  2551. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2552. this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
  2553. return false;
  2554. }
  2555. BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
  2556. ++m_position;
  2557. //
  2558. // restore the flags:
  2559. //
  2560. if(restore_flags)
  2561. {
  2562. // append a case change state if we need it:
  2563. if(m_has_case_change)
  2564. {
  2565. static_cast<re_case*>(
  2566. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  2567. )->icase = old_flags & regbase::icase;
  2568. }
  2569. this->flags(old_flags);
  2570. }
  2571. //
  2572. // set up the jump pointer if we have one:
  2573. //
  2574. if(jump_offset)
  2575. {
  2576. this->m_pdata->m_data.align();
  2577. re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
  2578. jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
  2579. if((this->m_last_state == jmp) && (markid != -2))
  2580. {
  2581. // Oops... we didn't have anything inside the assertion.
  2582. // Note we don't get here for negated forward lookahead as (?!)
  2583. // does have some uses.
  2584. // Rewind to start of (? sequence:
  2585. --m_position;
  2586. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2587. fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
  2588. return false;
  2589. }
  2590. }
  2591. //
  2592. // verify that if this is conditional expression, that we do have
  2593. // an alternative, if not add one:
  2594. //
  2595. if(markid == -4)
  2596. {
  2597. re_syntax_base* b = this->getaddress(expected_alt_point);
  2598. // Make sure we have exactly one alternative following this state:
  2599. if(b->type != syntax_element_alt)
  2600. {
  2601. re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
  2602. alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
  2603. }
  2604. else if(((std::ptrdiff_t)this->m_pdata->m_data.size() > (static_cast<re_alt*>(b)->alt.i + this->getoffset(b))) && (static_cast<re_alt*>(b)->alt.i > 0) && this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
  2605. {
  2606. // Can't have seen more than one alternative:
  2607. // Rewind to start of (? sequence:
  2608. --m_position;
  2609. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2610. fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
  2611. return false;
  2612. }
  2613. else
  2614. {
  2615. // We must *not* have seen an alternative inside a (DEFINE) block:
  2616. b = this->getaddress(b->next.i, b);
  2617. if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
  2618. {
  2619. // Rewind to start of (? sequence:
  2620. --m_position;
  2621. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2622. fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
  2623. return false;
  2624. }
  2625. }
  2626. // check for invalid repetition of next state:
  2627. b = this->getaddress(expected_alt_point);
  2628. b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
  2629. if((b->type != syntax_element_assert_backref)
  2630. && (b->type != syntax_element_startmark))
  2631. {
  2632. // Rewind to start of (? sequence:
  2633. --m_position;
  2634. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2635. fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
  2636. return false;
  2637. }
  2638. }
  2639. //
  2640. // append closing parenthesis state:
  2641. //
  2642. pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
  2643. pb->index = markid;
  2644. pb->icase = this->flags() & regbase::icase;
  2645. this->m_paren_start = last_paren_start;
  2646. //
  2647. // restore the alternate insertion point:
  2648. //
  2649. this->m_alt_insert_point = last_alt_point;
  2650. //
  2651. // and the case change data:
  2652. //
  2653. m_has_case_change = old_case_change;
  2654. //
  2655. // And the mark_reset data:
  2656. //
  2657. if(m_max_mark > m_mark_count)
  2658. {
  2659. m_mark_count = m_max_mark;
  2660. }
  2661. m_mark_reset = mark_reset;
  2662. m_max_mark = max_mark;
  2663. if(markid > 0)
  2664. {
  2665. #ifndef BOOST_NO_STD_DISTANCE
  2666. if(this->flags() & regbase::save_subexpression_location)
  2667. this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1;
  2668. #else
  2669. if(this->flags() & regbase::save_subexpression_location)
  2670. this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
  2671. #endif
  2672. //
  2673. // allow backrefs to this mark:
  2674. //
  2675. if(markid < (int)(sizeof(unsigned) * CHAR_BIT))
  2676. this->m_backrefs |= 1u << (markid - 1);
  2677. }
  2678. return true;
  2679. }
  2680. template <class charT, class traits>
  2681. bool basic_regex_parser<charT, traits>::match_verb(const char* verb)
  2682. {
  2683. while(*verb)
  2684. {
  2685. if(static_cast<charT>(*verb) != *m_position)
  2686. {
  2687. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2688. fail(regex_constants::error_perl_extension, m_position - m_base);
  2689. return false;
  2690. }
  2691. if(++m_position == m_end)
  2692. {
  2693. --m_position;
  2694. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2695. fail(regex_constants::error_perl_extension, m_position - m_base);
  2696. return false;
  2697. }
  2698. ++verb;
  2699. }
  2700. return true;
  2701. }
  2702. template <class charT, class traits>
  2703. bool basic_regex_parser<charT, traits>::parse_perl_verb()
  2704. {
  2705. if(++m_position == m_end)
  2706. {
  2707. // Rewind to start of (* sequence:
  2708. --m_position;
  2709. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2710. fail(regex_constants::error_perl_extension, m_position - m_base);
  2711. return false;
  2712. }
  2713. switch(*m_position)
  2714. {
  2715. case 'F':
  2716. if(++m_position == m_end)
  2717. {
  2718. // Rewind to start of (* sequence:
  2719. --m_position;
  2720. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2721. fail(regex_constants::error_perl_extension, m_position - m_base);
  2722. return false;
  2723. }
  2724. if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL"))
  2725. {
  2726. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2727. {
  2728. // Rewind to start of (* sequence:
  2729. --m_position;
  2730. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2731. fail(regex_constants::error_perl_extension, m_position - m_base);
  2732. return false;
  2733. }
  2734. ++m_position;
  2735. this->append_state(syntax_element_fail);
  2736. return true;
  2737. }
  2738. break;
  2739. case 'A':
  2740. if(++m_position == m_end)
  2741. {
  2742. // Rewind to start of (* sequence:
  2743. --m_position;
  2744. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2745. fail(regex_constants::error_perl_extension, m_position - m_base);
  2746. return false;
  2747. }
  2748. if(match_verb("CCEPT"))
  2749. {
  2750. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2751. {
  2752. // Rewind to start of (* sequence:
  2753. --m_position;
  2754. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2755. fail(regex_constants::error_perl_extension, m_position - m_base);
  2756. return false;
  2757. }
  2758. ++m_position;
  2759. this->append_state(syntax_element_accept);
  2760. return true;
  2761. }
  2762. break;
  2763. case 'C':
  2764. if(++m_position == m_end)
  2765. {
  2766. // Rewind to start of (* sequence:
  2767. --m_position;
  2768. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2769. fail(regex_constants::error_perl_extension, m_position - m_base);
  2770. return false;
  2771. }
  2772. if(match_verb("OMMIT"))
  2773. {
  2774. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2775. {
  2776. // Rewind to start of (* sequence:
  2777. --m_position;
  2778. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2779. fail(regex_constants::error_perl_extension, m_position - m_base);
  2780. return false;
  2781. }
  2782. ++m_position;
  2783. static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit;
  2784. this->m_pdata->m_disable_match_any = true;
  2785. return true;
  2786. }
  2787. break;
  2788. case 'P':
  2789. if(++m_position == m_end)
  2790. {
  2791. // Rewind to start of (* sequence:
  2792. --m_position;
  2793. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2794. fail(regex_constants::error_perl_extension, m_position - m_base);
  2795. return false;
  2796. }
  2797. if(match_verb("RUNE"))
  2798. {
  2799. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2800. {
  2801. // Rewind to start of (* sequence:
  2802. --m_position;
  2803. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2804. fail(regex_constants::error_perl_extension, m_position - m_base);
  2805. return false;
  2806. }
  2807. ++m_position;
  2808. static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune;
  2809. this->m_pdata->m_disable_match_any = true;
  2810. return true;
  2811. }
  2812. break;
  2813. case 'S':
  2814. if(++m_position == m_end)
  2815. {
  2816. // Rewind to start of (* sequence:
  2817. --m_position;
  2818. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2819. fail(regex_constants::error_perl_extension, m_position - m_base);
  2820. return false;
  2821. }
  2822. if(match_verb("KIP"))
  2823. {
  2824. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2825. {
  2826. // Rewind to start of (* sequence:
  2827. --m_position;
  2828. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2829. fail(regex_constants::error_perl_extension, m_position - m_base);
  2830. return false;
  2831. }
  2832. ++m_position;
  2833. static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip;
  2834. this->m_pdata->m_disable_match_any = true;
  2835. return true;
  2836. }
  2837. break;
  2838. case 'T':
  2839. if(++m_position == m_end)
  2840. {
  2841. // Rewind to start of (* sequence:
  2842. --m_position;
  2843. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2844. fail(regex_constants::error_perl_extension, m_position - m_base);
  2845. return false;
  2846. }
  2847. if(match_verb("HEN"))
  2848. {
  2849. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2850. {
  2851. // Rewind to start of (* sequence:
  2852. --m_position;
  2853. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2854. fail(regex_constants::error_perl_extension, m_position - m_base);
  2855. return false;
  2856. }
  2857. ++m_position;
  2858. this->append_state(syntax_element_then);
  2859. this->m_pdata->m_disable_match_any = true;
  2860. return true;
  2861. }
  2862. break;
  2863. }
  2864. // Rewind to start of (* sequence:
  2865. --m_position;
  2866. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2867. fail(regex_constants::error_perl_extension, m_position - m_base);
  2868. return false;
  2869. }
  2870. template <class charT, class traits>
  2871. bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
  2872. {
  2873. //
  2874. // parses an emacs style \sx or \Sx construct.
  2875. //
  2876. if(++m_position == m_end)
  2877. {
  2878. // Rewind to start of sequence:
  2879. --m_position;
  2880. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  2881. fail(regex_constants::error_escape, m_position - m_base);
  2882. return false;
  2883. }
  2884. basic_char_set<charT, traits> char_set;
  2885. if(negate)
  2886. char_set.negate();
  2887. static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
  2888. switch(*m_position)
  2889. {
  2890. case 's':
  2891. case ' ':
  2892. char_set.add_class(this->m_mask_space);
  2893. break;
  2894. case 'w':
  2895. char_set.add_class(this->m_word_mask);
  2896. break;
  2897. case '_':
  2898. char_set.add_single(digraph<charT>(charT('$')));
  2899. char_set.add_single(digraph<charT>(charT('&')));
  2900. char_set.add_single(digraph<charT>(charT('*')));
  2901. char_set.add_single(digraph<charT>(charT('+')));
  2902. char_set.add_single(digraph<charT>(charT('-')));
  2903. char_set.add_single(digraph<charT>(charT('_')));
  2904. char_set.add_single(digraph<charT>(charT('<')));
  2905. char_set.add_single(digraph<charT>(charT('>')));
  2906. break;
  2907. case '.':
  2908. char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
  2909. break;
  2910. case '(':
  2911. char_set.add_single(digraph<charT>(charT('(')));
  2912. char_set.add_single(digraph<charT>(charT('[')));
  2913. char_set.add_single(digraph<charT>(charT('{')));
  2914. break;
  2915. case ')':
  2916. char_set.add_single(digraph<charT>(charT(')')));
  2917. char_set.add_single(digraph<charT>(charT(']')));
  2918. char_set.add_single(digraph<charT>(charT('}')));
  2919. break;
  2920. case '"':
  2921. char_set.add_single(digraph<charT>(charT('"')));
  2922. char_set.add_single(digraph<charT>(charT('\'')));
  2923. char_set.add_single(digraph<charT>(charT('`')));
  2924. break;
  2925. case '\'':
  2926. char_set.add_single(digraph<charT>(charT('\'')));
  2927. char_set.add_single(digraph<charT>(charT(',')));
  2928. char_set.add_single(digraph<charT>(charT('#')));
  2929. break;
  2930. case '<':
  2931. char_set.add_single(digraph<charT>(charT(';')));
  2932. break;
  2933. case '>':
  2934. char_set.add_single(digraph<charT>(charT('\n')));
  2935. char_set.add_single(digraph<charT>(charT('\f')));
  2936. break;
  2937. default:
  2938. fail(regex_constants::error_ctype, m_position - m_base);
  2939. return false;
  2940. }
  2941. if(0 == this->append_set(char_set))
  2942. {
  2943. fail(regex_constants::error_ctype, m_position - m_base);
  2944. return false;
  2945. }
  2946. ++m_position;
  2947. return true;
  2948. }
  2949. template <class charT, class traits>
  2950. regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
  2951. {
  2952. // we have a (?imsx-imsx) group, convert it into a set of flags:
  2953. regex_constants::syntax_option_type f = this->flags();
  2954. bool breakout = false;
  2955. do
  2956. {
  2957. switch(*m_position)
  2958. {
  2959. case 's':
  2960. f |= regex_constants::mod_s;
  2961. f &= ~regex_constants::no_mod_s;
  2962. break;
  2963. case 'm':
  2964. f &= ~regex_constants::no_mod_m;
  2965. break;
  2966. case 'i':
  2967. f |= regex_constants::icase;
  2968. break;
  2969. case 'x':
  2970. f |= regex_constants::mod_x;
  2971. break;
  2972. default:
  2973. breakout = true;
  2974. continue;
  2975. }
  2976. if(++m_position == m_end)
  2977. {
  2978. // Rewind to start of (? sequence:
  2979. --m_position;
  2980. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2981. fail(regex_constants::error_paren, m_position - m_base);
  2982. return false;
  2983. }
  2984. }
  2985. while(!breakout);
  2986. breakout = false;
  2987. if(*m_position == static_cast<charT>('-'))
  2988. {
  2989. if(++m_position == m_end)
  2990. {
  2991. // Rewind to start of (? sequence:
  2992. --m_position;
  2993. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2994. fail(regex_constants::error_paren, m_position - m_base);
  2995. return false;
  2996. }
  2997. do
  2998. {
  2999. switch(*m_position)
  3000. {
  3001. case 's':
  3002. f &= ~regex_constants::mod_s;
  3003. f |= regex_constants::no_mod_s;
  3004. break;
  3005. case 'm':
  3006. f |= regex_constants::no_mod_m;
  3007. break;
  3008. case 'i':
  3009. f &= ~regex_constants::icase;
  3010. break;
  3011. case 'x':
  3012. f &= ~regex_constants::mod_x;
  3013. break;
  3014. default:
  3015. breakout = true;
  3016. continue;
  3017. }
  3018. if(++m_position == m_end)
  3019. {
  3020. // Rewind to start of (? sequence:
  3021. --m_position;
  3022. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  3023. fail(regex_constants::error_paren, m_position - m_base);
  3024. return false;
  3025. }
  3026. }
  3027. while(!breakout);
  3028. }
  3029. return f;
  3030. }
  3031. template <class charT, class traits>
  3032. bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
  3033. {
  3034. //
  3035. // If we didn't actually add any states after the last
  3036. // alternative then that's an error:
  3037. //
  3038. if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
  3039. && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)
  3040. &&
  3041. !(
  3042. ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
  3043. &&
  3044. ((this->flags() & regbase::no_empty_expressions) == 0)
  3045. )
  3046. )
  3047. {
  3048. fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
  3049. return false;
  3050. }
  3051. //
  3052. // Fix up our alternatives:
  3053. //
  3054. while(m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start))
  3055. {
  3056. //
  3057. // fix up the jump to point to the end of the states
  3058. // that we've just added:
  3059. //
  3060. std::ptrdiff_t jump_offset = m_alt_jumps.back();
  3061. m_alt_jumps.pop_back();
  3062. this->m_pdata->m_data.align();
  3063. re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
  3064. BOOST_ASSERT(jmp->type == syntax_element_jump);
  3065. jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
  3066. }
  3067. return true;
  3068. }
  3069. #ifdef BOOST_MSVC
  3070. #pragma warning(pop)
  3071. #endif
  3072. } // namespace BOOST_REGEX_DETAIL_NS
  3073. } // namespace boost
  3074. #ifdef BOOST_MSVC
  3075. #pragma warning(push)
  3076. #pragma warning(disable: 4103)
  3077. #endif
  3078. #ifdef BOOST_HAS_ABI_HEADERS
  3079. # include BOOST_ABI_SUFFIX
  3080. #endif
  3081. #ifdef BOOST_MSVC
  3082. #pragma warning(pop)
  3083. #endif
  3084. #endif