lexertl_lexer.hpp 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809
  1. /*=============================================================================
  2. Boost.Wave: A Standard compliant C++ preprocessor library
  3. http://www.boost.org/
  4. Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
  5. Software License, Version 1.0. (See accompanying file
  6. LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. =============================================================================*/
  8. #if !defined(BOOST_WAVE_LEXERTL_LEXER_HPP_INCLUDED)
  9. #define BOOST_WAVE_LEXERTL_LEXER_HPP_INCLUDED
  10. #include <fstream>
  11. #include <boost/iterator/iterator_traits.hpp>
  12. #include <boost/wave/wave_config.hpp>
  13. #include <boost/wave/language_support.hpp>
  14. #include <boost/wave/token_ids.hpp>
  15. #include <boost/wave/util/time_conversion_helper.hpp>
  16. #include <boost/wave/cpplexer/validate_universal_char.hpp>
  17. #include <boost/wave/cpplexer/convert_trigraphs.hpp>
  18. #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
  19. #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
  20. #include <boost/wave/cpplexer/detect_include_guards.hpp>
  21. #endif
  22. #include "wave_lexertl_config.hpp"
  23. #include "../lexertl_iterator.hpp"
  24. #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES != 0
  25. #include "wave_lexertl_tables.hpp"
  26. #else
  27. #include <boost/spirit/home/support/detail/lexer/generator.hpp>
  28. #include <boost/spirit/home/support/detail/lexer/rules.hpp>
  29. #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
  30. #include <boost/spirit/home/support/detail/lexer/consts.hpp>
  31. //#include "lexertl/examples/serialise.hpp>
  32. // #if BOOST_WAVE_LEXERTL_GENERATE_CPP_CODE != 0
  33. // #include "lexertl/examples/cpp_code.hpp"
  34. // #endif
  35. #endif
  36. ///////////////////////////////////////////////////////////////////////////////
  37. namespace boost { namespace wave { namespace cpplexer { namespace lexertl
  38. {
  39. #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
  40. ///////////////////////////////////////////////////////////////////////////////
  41. // The following numbers are the array sizes of the token regex's which we
  42. // need to specify to make the CW compiler happy (at least up to V9.5).
  43. #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
  44. #define INIT_DATA_SIZE 176
  45. #else
  46. #define INIT_DATA_SIZE 159
  47. #endif
  48. #define INIT_DATA_CPP_SIZE 15
  49. #define INIT_DATA_PP_NUMBER_SIZE 2
  50. #define INIT_MACRO_DATA_SIZE 27
  51. #endif // #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
  52. // this is just a hack to have a unique token id not otherwise used by Wave
  53. #define T_ANYCTRL T_LAST_TOKEN_ID
  54. ///////////////////////////////////////////////////////////////////////////////
  55. namespace lexer
  56. {
  57. ///////////////////////////////////////////////////////////////////////////////
  58. // this is the wrapper for the lexertl lexer library
  59. template <typename Iterator, typename Position>
  60. class lexertl
  61. {
  62. private:
  63. typedef BOOST_WAVE_STRINGTYPE string_type;
  64. typedef typename boost::detail::iterator_traits<Iterator>::value_type
  65. char_type;
  66. public:
  67. wave::token_id next_token(Iterator &first, Iterator const &last,
  68. string_type& token_value);
  69. #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES != 0
  70. lexertl() {}
  71. void init_dfa(wave::language_support lang, Position const& pos,
  72. bool force_reinit = false) {}
  73. bool is_initialized() const { return true; }
  74. #else
  75. lexertl() : has_compiled_dfa_(false) {}
  76. bool init_dfa(wave::language_support lang, Position const& pos,
  77. bool force_reinit = false);
  78. bool is_initialized() const { return has_compiled_dfa_; }
  79. // get time of last compilation
  80. static std::time_t get_compilation_time()
  81. { return compilation_time.get_time(); }
  82. bool load (std::istream& instrm);
  83. bool save (std::ostream& outstrm);
  84. private:
  85. boost::lexer::state_machine state_machine_;
  86. bool has_compiled_dfa_;
  87. // initialization data (regular expressions for the token definitions)
  88. struct lexer_macro_data {
  89. char_type const *name; // macro name
  90. char_type const *macro; // associated macro definition
  91. };
  92. static lexer_macro_data const init_macro_data[INIT_MACRO_DATA_SIZE]; // macro patterns
  93. struct lexer_data {
  94. token_id tokenid; // token data
  95. char_type const *tokenregex; // associated token to match
  96. };
  97. static lexer_data const init_data[INIT_DATA_SIZE]; // common patterns
  98. static lexer_data const init_data_cpp[INIT_DATA_CPP_SIZE]; // C++ only patterns
  99. static lexer_data const init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE]; // pp-number only patterns
  100. // helper for calculation of the time of last compilation
  101. static boost::wave::util::time_conversion_helper compilation_time;
  102. #endif // #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
  103. };
  104. #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
  105. ///////////////////////////////////////////////////////////////////////////////
  106. // get time of last compilation of this file
  107. template <typename IteratorT, typename PositionT>
  108. boost::wave::util::time_conversion_helper
  109. lexertl<IteratorT, PositionT>::compilation_time(__DATE__ " " __TIME__);
  110. ///////////////////////////////////////////////////////////////////////////////
  111. // token regex definitions
  112. // helper for initializing token data and macro definitions
  113. #define Q(c) "\\" c
  114. #define TRI(c) "{TRI}" c
  115. #define OR "|"
  116. #define MACRO_DATA(name, macro) { name, macro }
  117. #define TOKEN_DATA(id, regex) { id, regex }
  118. // lexertl macro definitions
  119. template <typename Iterator, typename Position>
  120. typename lexertl<Iterator, Position>::lexer_macro_data const
  121. lexertl<Iterator, Position>::init_macro_data[INIT_MACRO_DATA_SIZE] =
  122. {
  123. MACRO_DATA("ANY", "[\t\v\f\r\n\\040-\\377]"),
  124. MACRO_DATA("ANYCTRL", "[\\000-\\037]"),
  125. MACRO_DATA("TRI", "\\?\\?"),
  126. MACRO_DATA("BLANK", "[ \t\v\f]"),
  127. MACRO_DATA("CCOMMENT", "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"),
  128. MACRO_DATA("PPSPACE", "(" "{BLANK}" OR "{CCOMMENT}" ")*"),
  129. MACRO_DATA("OCTALDIGIT", "[0-7]"),
  130. MACRO_DATA("DIGIT", "[0-9]"),
  131. MACRO_DATA("HEXDIGIT", "[0-9a-fA-F]"),
  132. MACRO_DATA("OPTSIGN", "[-+]?"),
  133. MACRO_DATA("EXPSTART", "[eE][-+]"),
  134. MACRO_DATA("EXPONENT", "([eE]{OPTSIGN}{DIGIT}+)"),
  135. MACRO_DATA("NONDIGIT", "[a-zA-Z_]"),
  136. MACRO_DATA("INTEGER", "(" "(0x|0X){HEXDIGIT}+" OR "0{OCTALDIGIT}*" OR "[1-9]{DIGIT}*" ")"),
  137. MACRO_DATA("INTEGER_SUFFIX", "(" "[uU][lL]?" OR "[lL][uU]?" ")"),
  138. #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
  139. MACRO_DATA("LONGINTEGER_SUFFIX", "([uU]([lL][lL])|([lL][lL])[uU]?|i64)"),
  140. #else
  141. MACRO_DATA("LONGINTEGER_SUFFIX", "([uU]([lL][lL])|([lL][lL])[uU]?)"),
  142. #endif
  143. MACRO_DATA("FLOAT_SUFFIX", "(" "[fF][lL]?" OR "[lL][fF]?" ")"),
  144. MACRO_DATA("CHAR_SPEC", "L?"),
  145. MACRO_DATA("BACKSLASH", "(" Q("\\") OR TRI(Q("/")) ")"),
  146. MACRO_DATA("ESCAPESEQ", "{BACKSLASH}([abfnrtv?'\"]|{BACKSLASH}|x{HEXDIGIT}+|{OCTALDIGIT}{1,3})"),
  147. MACRO_DATA("HEXQUAD", "{HEXDIGIT}{4}"),
  148. MACRO_DATA("UNIVERSALCHAR", "{BACKSLASH}(u{HEXQUAD}|U{HEXQUAD}{2})"),
  149. MACRO_DATA("POUNDDEF", "(" "#" OR TRI("=") OR Q("%:") ")"),
  150. MACRO_DATA("NEWLINEDEF", "(" "\\n" OR "\\r" OR "\\r\\n" ")"),
  151. #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
  152. MACRO_DATA("INCLUDEDEF", "(include|include_next)"),
  153. #else
  154. MACRO_DATA("INCLUDEDEF", "include"),
  155. #endif
  156. MACRO_DATA("PP_NUMBERDEF", "\\.?{DIGIT}({DIGIT}|{NONDIGIT}|{EXPSTART}|\\.)*"),
  157. MACRO_DATA(NULL, NULL) // should be the last entry
  158. };
  159. // common C++/C99 token definitions
  160. template <typename Iterator, typename Position>
  161. typename lexertl<Iterator, Position>::lexer_data const
  162. lexertl<Iterator, Position>::init_data[INIT_DATA_SIZE] =
  163. {
  164. TOKEN_DATA(T_AND, "&"),
  165. TOKEN_DATA(T_ANDAND, "&&"),
  166. TOKEN_DATA(T_ASSIGN, "="),
  167. TOKEN_DATA(T_ANDASSIGN, "&="),
  168. TOKEN_DATA(T_OR, Q("|")),
  169. TOKEN_DATA(T_OR_TRIGRAPH, "{TRI}!"),
  170. TOKEN_DATA(T_ORASSIGN, Q("|=")),
  171. TOKEN_DATA(T_ORASSIGN_TRIGRAPH, "{TRI}!="),
  172. TOKEN_DATA(T_XOR, Q("^")),
  173. TOKEN_DATA(T_XOR_TRIGRAPH, "{TRI}'"),
  174. TOKEN_DATA(T_XORASSIGN, Q("^=")),
  175. TOKEN_DATA(T_XORASSIGN_TRIGRAPH, "{TRI}'="),
  176. TOKEN_DATA(T_COMMA, ","),
  177. TOKEN_DATA(T_COLON, ":"),
  178. TOKEN_DATA(T_DIVIDEASSIGN, Q("/=")),
  179. TOKEN_DATA(T_DIVIDE, Q("/")),
  180. TOKEN_DATA(T_DOT, Q(".")),
  181. TOKEN_DATA(T_ELLIPSIS, Q(".") "{3}"),
  182. TOKEN_DATA(T_EQUAL, "=="),
  183. TOKEN_DATA(T_GREATER, ">"),
  184. TOKEN_DATA(T_GREATEREQUAL, ">="),
  185. TOKEN_DATA(T_LEFTBRACE, Q("{")),
  186. TOKEN_DATA(T_LEFTBRACE_ALT, "<" Q("%")),
  187. TOKEN_DATA(T_LEFTBRACE_TRIGRAPH, "{TRI}<"),
  188. TOKEN_DATA(T_LESS, "<"),
  189. TOKEN_DATA(T_LESSEQUAL, "<="),
  190. TOKEN_DATA(T_LEFTPAREN, Q("(")),
  191. TOKEN_DATA(T_LEFTBRACKET, Q("[")),
  192. TOKEN_DATA(T_LEFTBRACKET_ALT, "<:"),
  193. TOKEN_DATA(T_LEFTBRACKET_TRIGRAPH, "{TRI}" Q("(")),
  194. TOKEN_DATA(T_MINUS, Q("-")),
  195. TOKEN_DATA(T_MINUSASSIGN, Q("-=")),
  196. TOKEN_DATA(T_MINUSMINUS, Q("-") "{2}"),
  197. TOKEN_DATA(T_PERCENT, Q("%")),
  198. TOKEN_DATA(T_PERCENTASSIGN, Q("%=")),
  199. TOKEN_DATA(T_NOT, "!"),
  200. TOKEN_DATA(T_NOTEQUAL, "!="),
  201. TOKEN_DATA(T_OROR, Q("|") "{2}"),
  202. TOKEN_DATA(T_OROR_TRIGRAPH, "{TRI}!\\||\\|{TRI}!|{TRI}!{TRI}!"),
  203. TOKEN_DATA(T_PLUS, Q("+")),
  204. TOKEN_DATA(T_PLUSASSIGN, Q("+=")),
  205. TOKEN_DATA(T_PLUSPLUS, Q("+") "{2}"),
  206. TOKEN_DATA(T_ARROW, Q("->")),
  207. TOKEN_DATA(T_QUESTION_MARK, Q("?")),
  208. TOKEN_DATA(T_RIGHTBRACE, Q("}")),
  209. TOKEN_DATA(T_RIGHTBRACE_ALT, Q("%>")),
  210. TOKEN_DATA(T_RIGHTBRACE_TRIGRAPH, "{TRI}>"),
  211. TOKEN_DATA(T_RIGHTPAREN, Q(")")),
  212. TOKEN_DATA(T_RIGHTBRACKET, Q("]")),
  213. TOKEN_DATA(T_RIGHTBRACKET_ALT, ":>"),
  214. TOKEN_DATA(T_RIGHTBRACKET_TRIGRAPH, "{TRI}" Q(")")),
  215. TOKEN_DATA(T_SEMICOLON, ";"),
  216. TOKEN_DATA(T_SHIFTLEFT, "<<"),
  217. TOKEN_DATA(T_SHIFTLEFTASSIGN, "<<="),
  218. TOKEN_DATA(T_SHIFTRIGHT, ">>"),
  219. TOKEN_DATA(T_SHIFTRIGHTASSIGN, ">>="),
  220. TOKEN_DATA(T_STAR, Q("*")),
  221. TOKEN_DATA(T_COMPL, Q("~")),
  222. TOKEN_DATA(T_COMPL_TRIGRAPH, "{TRI}-"),
  223. TOKEN_DATA(T_STARASSIGN, Q("*=")),
  224. TOKEN_DATA(T_ASM, "asm"),
  225. TOKEN_DATA(T_AUTO, "auto"),
  226. TOKEN_DATA(T_BOOL, "bool"),
  227. TOKEN_DATA(T_FALSE, "false"),
  228. TOKEN_DATA(T_TRUE, "true"),
  229. TOKEN_DATA(T_BREAK, "break"),
  230. TOKEN_DATA(T_CASE, "case"),
  231. TOKEN_DATA(T_CATCH, "catch"),
  232. TOKEN_DATA(T_CHAR, "char"),
  233. TOKEN_DATA(T_CLASS, "class"),
  234. TOKEN_DATA(T_CONST, "const"),
  235. TOKEN_DATA(T_CONSTCAST, "const_cast"),
  236. TOKEN_DATA(T_CONTINUE, "continue"),
  237. TOKEN_DATA(T_DEFAULT, "default"),
  238. TOKEN_DATA(T_DELETE, "delete"),
  239. TOKEN_DATA(T_DO, "do"),
  240. TOKEN_DATA(T_DOUBLE, "double"),
  241. TOKEN_DATA(T_DYNAMICCAST, "dynamic_cast"),
  242. TOKEN_DATA(T_ELSE, "else"),
  243. TOKEN_DATA(T_ENUM, "enum"),
  244. TOKEN_DATA(T_EXPLICIT, "explicit"),
  245. TOKEN_DATA(T_EXPORT, "export"),
  246. TOKEN_DATA(T_EXTERN, "extern"),
  247. TOKEN_DATA(T_FLOAT, "float"),
  248. TOKEN_DATA(T_FOR, "for"),
  249. TOKEN_DATA(T_FRIEND, "friend"),
  250. TOKEN_DATA(T_GOTO, "goto"),
  251. TOKEN_DATA(T_IF, "if"),
  252. TOKEN_DATA(T_INLINE, "inline"),
  253. TOKEN_DATA(T_INT, "int"),
  254. TOKEN_DATA(T_LONG, "long"),
  255. TOKEN_DATA(T_MUTABLE, "mutable"),
  256. TOKEN_DATA(T_NAMESPACE, "namespace"),
  257. TOKEN_DATA(T_NEW, "new"),
  258. TOKEN_DATA(T_OPERATOR, "operator"),
  259. TOKEN_DATA(T_PRIVATE, "private"),
  260. TOKEN_DATA(T_PROTECTED, "protected"),
  261. TOKEN_DATA(T_PUBLIC, "public"),
  262. TOKEN_DATA(T_REGISTER, "register"),
  263. TOKEN_DATA(T_REINTERPRETCAST, "reinterpret_cast"),
  264. TOKEN_DATA(T_RETURN, "return"),
  265. TOKEN_DATA(T_SHORT, "short"),
  266. TOKEN_DATA(T_SIGNED, "signed"),
  267. TOKEN_DATA(T_SIZEOF, "sizeof"),
  268. TOKEN_DATA(T_STATIC, "static"),
  269. TOKEN_DATA(T_STATICCAST, "static_cast"),
  270. TOKEN_DATA(T_STRUCT, "struct"),
  271. TOKEN_DATA(T_SWITCH, "switch"),
  272. TOKEN_DATA(T_TEMPLATE, "template"),
  273. TOKEN_DATA(T_THIS, "this"),
  274. TOKEN_DATA(T_THROW, "throw"),
  275. TOKEN_DATA(T_TRY, "try"),
  276. TOKEN_DATA(T_TYPEDEF, "typedef"),
  277. TOKEN_DATA(T_TYPEID, "typeid"),
  278. TOKEN_DATA(T_TYPENAME, "typename"),
  279. TOKEN_DATA(T_UNION, "union"),
  280. TOKEN_DATA(T_UNSIGNED, "unsigned"),
  281. TOKEN_DATA(T_USING, "using"),
  282. TOKEN_DATA(T_VIRTUAL, "virtual"),
  283. TOKEN_DATA(T_VOID, "void"),
  284. TOKEN_DATA(T_VOLATILE, "volatile"),
  285. TOKEN_DATA(T_WCHART, "wchar_t"),
  286. TOKEN_DATA(T_WHILE, "while"),
  287. TOKEN_DATA(T_PP_DEFINE, "{POUNDDEF}{PPSPACE}define"),
  288. TOKEN_DATA(T_PP_IF, "{POUNDDEF}{PPSPACE}if"),
  289. TOKEN_DATA(T_PP_IFDEF, "{POUNDDEF}{PPSPACE}ifdef"),
  290. TOKEN_DATA(T_PP_IFNDEF, "{POUNDDEF}{PPSPACE}ifndef"),
  291. TOKEN_DATA(T_PP_ELSE, "{POUNDDEF}{PPSPACE}else"),
  292. TOKEN_DATA(T_PP_ELIF, "{POUNDDEF}{PPSPACE}elif"),
  293. TOKEN_DATA(T_PP_ENDIF, "{POUNDDEF}{PPSPACE}endif"),
  294. TOKEN_DATA(T_PP_ERROR, "{POUNDDEF}{PPSPACE}error"),
  295. TOKEN_DATA(T_PP_QHEADER, "{POUNDDEF}{PPSPACE}{INCLUDEDEF}{PPSPACE}" Q("\"") "[^\\n\\r\"]+" Q("\"")),
  296. TOKEN_DATA(T_PP_HHEADER, "{POUNDDEF}{PPSPACE}{INCLUDEDEF}{PPSPACE}" "<" "[^\\n\\r>]+" ">"),
  297. TOKEN_DATA(T_PP_INCLUDE, "{POUNDDEF}{PPSPACE}{INCLUDEDEF}{PPSPACE}"),
  298. TOKEN_DATA(T_PP_LINE, "{POUNDDEF}{PPSPACE}line"),
  299. TOKEN_DATA(T_PP_PRAGMA, "{POUNDDEF}{PPSPACE}pragma"),
  300. TOKEN_DATA(T_PP_UNDEF, "{POUNDDEF}{PPSPACE}undef"),
  301. TOKEN_DATA(T_PP_WARNING, "{POUNDDEF}{PPSPACE}warning"),
  302. #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
  303. TOKEN_DATA(T_MSEXT_INT8, "__int8"),
  304. TOKEN_DATA(T_MSEXT_INT16, "__int16"),
  305. TOKEN_DATA(T_MSEXT_INT32, "__int32"),
  306. TOKEN_DATA(T_MSEXT_INT64, "__int64"),
  307. TOKEN_DATA(T_MSEXT_BASED, "_?" "_based"),
  308. TOKEN_DATA(T_MSEXT_DECLSPEC, "_?" "_declspec"),
  309. TOKEN_DATA(T_MSEXT_CDECL, "_?" "_cdecl"),
  310. TOKEN_DATA(T_MSEXT_FASTCALL, "_?" "_fastcall"),
  311. TOKEN_DATA(T_MSEXT_STDCALL, "_?" "_stdcall"),
  312. TOKEN_DATA(T_MSEXT_TRY , "__try"),
  313. TOKEN_DATA(T_MSEXT_EXCEPT, "__except"),
  314. TOKEN_DATA(T_MSEXT_FINALLY, "__finally"),
  315. TOKEN_DATA(T_MSEXT_LEAVE, "__leave"),
  316. TOKEN_DATA(T_MSEXT_INLINE, "_?" "_inline"),
  317. TOKEN_DATA(T_MSEXT_ASM, "_?" "_asm"),
  318. TOKEN_DATA(T_MSEXT_PP_REGION, "{POUNDDEF}{PPSPACE}region"),
  319. TOKEN_DATA(T_MSEXT_PP_ENDREGION, "{POUNDDEF}{PPSPACE}endregion"),
  320. #endif // BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
  321. TOKEN_DATA(T_LONGINTLIT, "{INTEGER}{LONGINTEGER_SUFFIX}"),
  322. TOKEN_DATA(T_INTLIT, "{INTEGER}{INTEGER_SUFFIX}?"),
  323. TOKEN_DATA(T_FLOATLIT,
  324. "(" "{DIGIT}*" Q(".") "{DIGIT}+" OR "{DIGIT}+" Q(".") "){EXPONENT}?{FLOAT_SUFFIX}?" OR
  325. "{DIGIT}+{EXPONENT}{FLOAT_SUFFIX}?"),
  326. #if BOOST_WAVE_USE_STRICT_LEXER != 0
  327. TOKEN_DATA(T_IDENTIFIER,
  328. "(" "{NONDIGIT}" OR "{UNIVERSALCHAR}" ")"
  329. "(" "{NONDIGIT}" OR "{DIGIT}" OR "{UNIVERSALCHAR}" ")*"),
  330. #else
  331. TOKEN_DATA(T_IDENTIFIER,
  332. "(" "{NONDIGIT}" OR Q("$") OR "{UNIVERSALCHAR}" ")"
  333. "(" "{NONDIGIT}" OR Q("$") OR "{DIGIT}" OR "{UNIVERSALCHAR}" ")*"),
  334. #endif
  335. TOKEN_DATA(T_CCOMMENT, "{CCOMMENT}"),
  336. TOKEN_DATA(T_CPPCOMMENT, Q("/") Q("/[^\\n\\r]*") "{NEWLINEDEF}" ),
  337. TOKEN_DATA(T_CHARLIT,
  338. "{CHAR_SPEC}" "'" "({ESCAPESEQ}|[^\\n\\r']|{UNIVERSALCHAR})+" "'"),
  339. TOKEN_DATA(T_STRINGLIT,
  340. "{CHAR_SPEC}" Q("\"") "({ESCAPESEQ}|[^\\n\\r\"]|{UNIVERSALCHAR})*" Q("\"")),
  341. TOKEN_DATA(T_SPACE, "{BLANK}+"),
  342. TOKEN_DATA(T_CONTLINE, Q("\\") "\\n"),
  343. TOKEN_DATA(T_NEWLINE, "{NEWLINEDEF}"),
  344. TOKEN_DATA(T_POUND_POUND, "##"),
  345. TOKEN_DATA(T_POUND_POUND_ALT, Q("%:") Q("%:")),
  346. TOKEN_DATA(T_POUND_POUND_TRIGRAPH, "({TRI}=){2}"),
  347. TOKEN_DATA(T_POUND, "#"),
  348. TOKEN_DATA(T_POUND_ALT, Q("%:")),
  349. TOKEN_DATA(T_POUND_TRIGRAPH, "{TRI}="),
  350. TOKEN_DATA(T_ANY_TRIGRAPH, "{TRI}\\/"),
  351. TOKEN_DATA(T_ANY, "{ANY}"),
  352. TOKEN_DATA(T_ANYCTRL, "{ANYCTRL}"), // this should be the last recognized token
  353. { token_id(0) } // this should be the last entry
  354. };
  355. // C++ only token definitions
  356. template <typename Iterator, typename Position>
  357. typename lexertl<Iterator, Position>::lexer_data const
  358. lexertl<Iterator, Position>::init_data_cpp[INIT_DATA_CPP_SIZE] =
  359. {
  360. TOKEN_DATA(T_AND_ALT, "bitand"),
  361. TOKEN_DATA(T_ANDASSIGN_ALT, "and_eq"),
  362. TOKEN_DATA(T_ANDAND_ALT, "and"),
  363. TOKEN_DATA(T_OR_ALT, "bitor"),
  364. TOKEN_DATA(T_ORASSIGN_ALT, "or_eq"),
  365. TOKEN_DATA(T_OROR_ALT, "or"),
  366. TOKEN_DATA(T_XORASSIGN_ALT, "xor_eq"),
  367. TOKEN_DATA(T_XOR_ALT, "xor"),
  368. TOKEN_DATA(T_NOTEQUAL_ALT, "not_eq"),
  369. TOKEN_DATA(T_NOT_ALT, "not"),
  370. TOKEN_DATA(T_COMPL_ALT, "compl"),
  371. #if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0
  372. TOKEN_DATA(T_IMPORT, "import"),
  373. #endif
  374. TOKEN_DATA(T_ARROWSTAR, Q("->") Q("*")),
  375. TOKEN_DATA(T_DOTSTAR, Q(".") Q("*")),
  376. TOKEN_DATA(T_COLON_COLON, "::"),
  377. { token_id(0) } // this should be the last entry
  378. };
  379. // pp-number specific token definitions
  380. template <typename Iterator, typename Position>
  381. typename lexertl<Iterator, Position>::lexer_data const
  382. lexertl<Iterator, Position>::init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE] =
  383. {
  384. TOKEN_DATA(T_PP_NUMBER, "{PP_NUMBERDEF}"),
  385. { token_id(0) } // this should be the last entry
  386. };
  387. #undef MACRO_DATA
  388. #undef TOKEN_DATA
  389. #undef OR
  390. #undef TRI
  391. #undef Q
  392. ///////////////////////////////////////////////////////////////////////////////
  393. // initialize lexertl lexer from C++ token regex's
  394. template <typename Iterator, typename Position>
  395. inline bool
  396. lexertl<Iterator, Position>::init_dfa(wave::language_support lang,
  397. Position const& pos, bool force_reinit)
  398. {
  399. if (has_compiled_dfa_)
  400. return true;
  401. std::ifstream dfa_in("wave_lexertl_lexer.dfa", std::ios::in|std::ios::binary);
  402. if (force_reinit || !dfa_in.is_open() || !load (dfa_in))
  403. {
  404. dfa_in.close();
  405. state_machine_.clear();
  406. // register macro definitions
  407. boost::lexer::rules rules;
  408. for (int k = 0; NULL != init_macro_data[k].name; ++k) {
  409. rules.add_macro(init_macro_data[k].name, init_macro_data[k].macro);
  410. }
  411. // if pp-numbers should be preferred, insert the corresponding rule first
  412. if (wave::need_prefer_pp_numbers(lang)) {
  413. for (int j = 0; 0 != init_data_pp_number[j].tokenid; ++j) {
  414. rules.add(init_data_pp_number[j].tokenregex,
  415. init_data_pp_number[j].tokenid);
  416. }
  417. }
  418. // if in C99 mode, some of the keywords are not valid
  419. if (!wave::need_c99(lang)) {
  420. for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {
  421. rules.add(init_data_cpp[j].tokenregex,
  422. init_data_cpp[j].tokenid);
  423. }
  424. }
  425. for (int i = 0; 0 != init_data[i].tokenid; ++i) {
  426. rules.add(init_data[i].tokenregex, init_data[i].tokenid);
  427. }
  428. // generate minimized DFA
  429. try {
  430. boost::lexer::generator::build (rules, state_machine_);
  431. boost::lexer::generator::minimise (state_machine_);
  432. }
  433. catch (std::runtime_error const& e) {
  434. string_type msg("lexertl initialization error: ");
  435. msg += e.what();
  436. BOOST_WAVE_LEXER_THROW(wave::cpplexer::lexing_exception,
  437. unexpected_error, msg.c_str(),
  438. pos.get_line(), pos.get_column(), pos.get_file().c_str());
  439. return false;
  440. }
  441. std::ofstream dfa_out ("wave_lexertl_lexer.dfa",
  442. std::ios::out|std::ios::binary|std::ios::trunc);
  443. if (dfa_out.is_open())
  444. save (dfa_out);
  445. }
  446. has_compiled_dfa_ = true;
  447. return true;
  448. }
  449. #endif // BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
  450. ///////////////////////////////////////////////////////////////////////////////
  451. // return next token from the input stream
  452. template <typename Iterator, typename Position>
  453. inline wave::token_id
  454. lexertl<Iterator, Position>::next_token(Iterator &first, Iterator const &last,
  455. string_type& token_value)
  456. {
  457. #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
  458. size_t const* const lookup = &state_machine_.data()._lookup[0]->front ();
  459. size_t const dfa_alphabet = state_machine_.data()._dfa_alphabet[0];
  460. size_t const* dfa = &state_machine_.data()._dfa[0]->front();
  461. size_t const* ptr = dfa + dfa_alphabet + boost::lexer::dfa_offset;
  462. #else
  463. const std::size_t *ptr = dfa + dfa_offset;
  464. #endif // BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
  465. Iterator curr = first;
  466. Iterator end_token = first;
  467. bool end_state = (*ptr != 0);
  468. size_t id = *(ptr + 1);
  469. while (curr != last) {
  470. size_t const state = ptr[lookup[int(*curr)]];
  471. if (0 == state)
  472. break;
  473. ++curr;
  474. #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
  475. ptr = &dfa[state * (dfa_alphabet + boost::lexer::dfa_offset)];
  476. #else
  477. ptr = &dfa[state * dfa_offset];
  478. #endif // BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
  479. if (0 != *ptr) {
  480. end_state = true;
  481. id = *(ptr + 1);
  482. end_token = curr;
  483. }
  484. }
  485. if (end_state) {
  486. if (T_ANY == id) {
  487. id = TOKEN_FROM_ID(*first, UnknownTokenType);
  488. }
  489. // return longest match
  490. string_type str(first, end_token);
  491. token_value.swap(str);
  492. first = end_token;
  493. return wave::token_id(id);
  494. }
  495. return T_EOF;
  496. }
  497. #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
  498. ///////////////////////////////////////////////////////////////////////////////
  499. // load the DFA tables to/from a stream
  500. template <typename Iterator, typename Position>
  501. inline bool
  502. lexertl<Iterator, Position>::load (std::istream& instrm)
  503. {
  504. // #if !defined(BOOST_WAVE_LEXERTL_GENERATE_CPP_CODE)
  505. // std::size_t version = 0;
  506. // boost::lexer::serialise::load_as_binary(instrm, state_machine_, version);
  507. // if (version != (std::size_t)get_compilation_time())
  508. // return false; // too new for us
  509. // return instrm.good();
  510. // #else
  511. return false; // always create the dfa when generating the C++ code
  512. // #endif
  513. }
  514. ///////////////////////////////////////////////////////////////////////////////
  515. // save the DFA tables to/from a stream
  516. template <typename Iterator, typename Position>
  517. inline bool
  518. lexertl<Iterator, Position>::save (std::ostream& outstrm)
  519. {
  520. // #if defined(BOOST_WAVE_LEXERTL_GENERATE_CPP_CODE)
  521. // cpp_code::generate(state_machine_, outstrm);
  522. // #else
  523. // boost::lexer::serialise::save_as_binary(state_machine_, outstrm,
  524. // (std::size_t)get_compilation_time());
  525. // #endif
  526. return outstrm.good();
  527. }
  528. #endif // #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
  529. ///////////////////////////////////////////////////////////////////////////////
  530. } // namespace lexer
  531. ///////////////////////////////////////////////////////////////////////////////
  532. template <typename Iterator, typename Position = wave::util::file_position_type>
  533. class lexertl_functor
  534. : public lexertl_input_interface<wave::cpplexer::lex_token<Position> >
  535. {
  536. public:
  537. typedef wave::util::position_iterator<Iterator, Position> iterator_type;
  538. typedef typename boost::detail::iterator_traits<Iterator>::value_type
  539. char_type;
  540. typedef BOOST_WAVE_STRINGTYPE string_type;
  541. typedef wave::cpplexer::lex_token<Position> token_type;
  542. lexertl_functor(Iterator const &first_, Iterator const &last_,
  543. Position const &pos_, wave::language_support language)
  544. : first(first_, last_, pos_), language(language), at_eof(false)
  545. {
  546. lexer_.init_dfa(language, pos_);
  547. }
  548. ~lexertl_functor() {}
  549. // get the next token from the input stream
  550. token_type& get(token_type& result)
  551. {
  552. if (lexer_.is_initialized() && !at_eof) {
  553. do {
  554. // generate and return the next token
  555. string_type token_val;
  556. Position pos = first.get_position(); // begin of token position
  557. wave::token_id id = lexer_.next_token(first, last, token_val);
  558. if (T_CONTLINE != id) {
  559. // The cast should avoid spurious warnings about missing case labels
  560. // for the other token ids's.
  561. switch (id) {
  562. case T_IDENTIFIER:
  563. // test identifier characters for validity (throws if
  564. // invalid chars found)
  565. if (!wave::need_no_character_validation(language)) {
  566. using wave::cpplexer::impl::validate_identifier_name;
  567. validate_identifier_name(token_val,
  568. pos.get_line(), pos.get_column(), pos.get_file());
  569. }
  570. break;
  571. case T_STRINGLIT:
  572. case T_CHARLIT:
  573. // test literal characters for validity (throws if invalid
  574. // chars found)
  575. if (wave::need_convert_trigraphs(language)) {
  576. using wave::cpplexer::impl::convert_trigraphs;
  577. token_val = convert_trigraphs(token_val);
  578. }
  579. if (!wave::need_no_character_validation(language)) {
  580. using wave::cpplexer::impl::validate_literal;
  581. validate_literal(token_val,
  582. pos.get_line(), pos.get_column(), pos.get_file());
  583. }
  584. break;
  585. case T_LONGINTLIT: // supported in C99 and long_long mode
  586. if (!wave::need_long_long(language)) {
  587. // syntax error: not allowed in C++ mode
  588. BOOST_WAVE_LEXER_THROW(
  589. wave::cpplexer::lexing_exception,
  590. invalid_long_long_literal, token_val.c_str(),
  591. pos.get_line(), pos.get_column(),
  592. pos.get_file().c_str());
  593. }
  594. break;
  595. #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
  596. case T_PP_HHEADER:
  597. case T_PP_QHEADER:
  598. case T_PP_INCLUDE:
  599. // convert to the corresponding ..._next token, if appropriate
  600. {
  601. // Skip '#' and whitespace and see whether we find an
  602. // 'include_next' here.
  603. typename string_type::size_type start = token_val.find("include");
  604. if (0 == token_val.compare(start, 12, "include_next", 12))
  605. id = token_id(id | AltTokenType);
  606. }
  607. break;
  608. #endif // BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
  609. case T_EOF:
  610. // T_EOF is returned as a valid token, the next call will
  611. // return T_EOI, i.e. the actual end of input
  612. at_eof = true;
  613. token_val.clear();
  614. break;
  615. case T_OR_TRIGRAPH:
  616. case T_XOR_TRIGRAPH:
  617. case T_LEFTBRACE_TRIGRAPH:
  618. case T_RIGHTBRACE_TRIGRAPH:
  619. case T_LEFTBRACKET_TRIGRAPH:
  620. case T_RIGHTBRACKET_TRIGRAPH:
  621. case T_COMPL_TRIGRAPH:
  622. case T_POUND_TRIGRAPH:
  623. case T_ANY_TRIGRAPH:
  624. if (wave::need_convert_trigraphs(language))
  625. {
  626. using wave::cpplexer::impl::convert_trigraph;
  627. token_val = convert_trigraph(token_val);
  628. }
  629. break;
  630. case T_ANYCTRL:
  631. // matched some unexpected character
  632. {
  633. // 21 is the max required size for a 64 bit integer
  634. // represented as a string
  635. char buffer[22];
  636. string_type msg("invalid character in input stream: '0x");
  637. // for some systems sprintf is in namespace std
  638. using namespace std;
  639. sprintf(buffer, "%02x'", token_val[0]);
  640. msg += buffer;
  641. BOOST_WAVE_LEXER_THROW(
  642. wave::cpplexer::lexing_exception,
  643. generic_lexing_error,
  644. msg.c_str(), pos.get_line(), pos.get_column(),
  645. pos.get_file().c_str());
  646. }
  647. break;
  648. }
  649. result = token_type(id, token_val, pos);
  650. #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
  651. return guards.detect_guard(result);
  652. #else
  653. return result;
  654. #endif
  655. }
  656. } while (true); // skip the T_CONTLINE token
  657. }
  658. return result = token_type(); // return T_EOI
  659. }
  660. void set_position(Position const &pos)
  661. {
  662. // set position has to change the file name and line number only
  663. first.get_position().set_file(pos.get_file());
  664. first.get_position().set_line(pos.get_line());
  665. }
  666. #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
  667. bool has_include_guards(std::string& guard_name) const
  668. { return guards.detected(guard_name); }
  669. #endif
  670. private:
  671. iterator_type first;
  672. iterator_type last;
  673. wave::language_support language;
  674. bool at_eof;
  675. #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
  676. include_guards<token_type> guards;
  677. #endif
  678. static lexer::lexertl<iterator_type, Position> lexer_;
  679. };
  680. template <typename Iterator, typename Position>
  681. lexer::lexertl<
  682. typename lexertl_functor<Iterator, Position>::iterator_type, Position>
  683. lexertl_functor<Iterator, Position>::lexer_;
  684. #undef INIT_DATA_SIZE
  685. #undef INIT_DATA_CPP_SIZE
  686. #undef INIT_DATA_PP_NUMBER_SIZE
  687. #undef INIT_MACRO_DATA_SIZE
  688. #undef T_ANYCTRL
  689. ///////////////////////////////////////////////////////////////////////////////
  690. //
  691. // The new_lexer_gen<>::new_lexer function (declared in lexertl_interface.hpp)
  692. // should be defined inline, if the lex_functor shouldn't be instantiated
  693. // separately from the lex_iterator.
  694. //
  695. // Separate (explicit) instantiation helps to reduce compilation time.
  696. //
  697. ///////////////////////////////////////////////////////////////////////////////
  698. #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
  699. #define BOOST_WAVE_FLEX_NEW_LEXER_INLINE
  700. #else
  701. #define BOOST_WAVE_FLEX_NEW_LEXER_INLINE inline
  702. #endif
  703. ///////////////////////////////////////////////////////////////////////////////
  704. //
  705. // The 'new_lexer' function allows the opaque generation of a new lexer object.
  706. // It is coupled to the iterator type to allow to decouple the lexer/iterator
  707. // configurations at compile time.
  708. //
  709. // This function is declared inside the xlex_interface.hpp file, which is
  710. // referenced by the source file calling the lexer and the source file, which
  711. // instantiates the lex_functor. But it is defined here, so it will be
  712. // instantiated only while compiling the source file, which instantiates the
  713. // lex_functor. While the xlex_interface.hpp file may be included everywhere,
  714. // this file (xlex_lexer.hpp) should be included only once. This allows
  715. // to decouple the lexer interface from the lexer implementation and reduces
  716. // compilation time.
  717. //
  718. ///////////////////////////////////////////////////////////////////////////////
  719. template <typename Iterator, typename Position>
  720. BOOST_WAVE_FLEX_NEW_LEXER_INLINE
  721. wave::cpplexer::lex_input_interface<wave::cpplexer::lex_token<Position> > *
  722. new_lexer_gen<Iterator, Position>::new_lexer(Iterator const &first,
  723. Iterator const &last, Position const &pos, wave::language_support language)
  724. {
  725. return new lexertl_functor<Iterator, Position>(first, last, pos, language);
  726. }
  727. #undef BOOST_WAVE_FLEX_NEW_LEXER_INLINE
  728. ///////////////////////////////////////////////////////////////////////////////
  729. }}}} // namespace boost::wave::cpplexer::lexertl
  730. #endif // !defined(BOOST_WAVE_LEXERTL_LEXER_HPP_INCLUDED)