idl.re 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
  1. /*=============================================================================
  2. Boost.Wave: A Standard compliant C++ preprocessor library
  3. Sample: IDL lexer
  4. http://www.boost.org/
  5. Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
  6. Software License, Version 1.0. (See accompanying file
  7. LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  8. =============================================================================*/
  9. #include <ctime>
  10. #include <cstdlib>
  11. #include <cstdio>
  12. #include <cstring>
  13. #include <sys/types.h>
  14. #include <sys/stat.h>
  15. #include <fcntl.h>
  16. #include <boost/config.hpp>
  17. #if defined(BOOST_HAS_UNISTD_H)
  18. #include <unistd.h>
  19. #else
  20. #include <io.h>
  21. #endif
  22. #include <boost/assert.hpp>
  23. #include <boost/detail/workaround.hpp>
  24. // reuse the token ids and re2c helper functions from the default C++ lexer
  25. #include <boost/wave/token_ids.hpp>
  26. #include <boost/wave/cpplexer/re2clex/aq.hpp>
  27. #include <boost/wave/cpplexer/re2clex/scanner.hpp>
  28. #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
  29. #include "idl_re.hpp"
  30. #if defined(_MSC_VER) && !defined(__COMO__)
  31. #pragma warning (disable: 4101) // 'foo' : unreferenced local variable
  32. #pragma warning (disable: 4102) // 'foo' : unreferenced label
  33. #endif
  34. #define BOOST_WAVE_BSIZE 196608
  35. #define YYCTYPE uchar
  36. #define YYCURSOR cursor
  37. #define YYLIMIT s->lim
  38. #define YYMARKER s->ptr
  39. #define YYFILL(n) {cursor = fill(s, cursor);}
  40. //#define BOOST_WAVE_RET(i) {s->cur = cursor; return (i);}
  41. #define BOOST_WAVE_RET(i) \
  42. { \
  43. s->line += count_backslash_newlines(s, cursor); \
  44. s->cur = cursor; \
  45. return (i); \
  46. } \
  47. /**/
  48. ///////////////////////////////////////////////////////////////////////////////
  49. namespace boost {
  50. namespace wave {
  51. namespace idllexer {
  52. namespace re2clex {
  53. #define RE2C_ASSERT BOOST_ASSERT
  54. int
  55. get_one_char(boost::wave::cpplexer::re2clex::Scanner *s)
  56. {
  57. using namespace boost::wave::cpplexer::re2clex;
  58. if (0 != s->act) {
  59. RE2C_ASSERT(s->first != 0 && s->last != 0);
  60. RE2C_ASSERT(s->first <= s->act && s->act <= s->last);
  61. if (s->act < s->last)
  62. return *(s->act)++;
  63. }
  64. return -1;
  65. }
  66. std::ptrdiff_t
  67. rewind_stream (boost::wave::cpplexer::re2clex::Scanner *s, int cnt)
  68. {
  69. if (0 != s->act) {
  70. RE2C_ASSERT(s->first != 0 && s->last != 0);
  71. s->act += cnt;
  72. RE2C_ASSERT(s->first <= s->act && s->act <= s->last);
  73. return s->act - s->first;
  74. }
  75. return 0;
  76. }
  77. std::size_t
  78. get_first_eol_offset(boost::wave::cpplexer::re2clex::Scanner* s)
  79. {
  80. if (!AQ_EMPTY(s->eol_offsets))
  81. {
  82. return s->eol_offsets->queue[s->eol_offsets->head];
  83. }
  84. else
  85. {
  86. return (unsigned int)-1;
  87. }
  88. }
  89. void
  90. adjust_eol_offsets(boost::wave::cpplexer::re2clex::Scanner* s,
  91. std::size_t adjustment)
  92. {
  93. boost::wave::cpplexer::re2clex::aq_queue q;
  94. std::size_t i;
  95. if (!s->eol_offsets)
  96. s->eol_offsets = boost::wave::cpplexer::re2clex::aq_create();
  97. q = s->eol_offsets;
  98. if (AQ_EMPTY(q))
  99. return;
  100. i = q->head;
  101. while (i != q->tail)
  102. {
  103. if (adjustment > q->queue[i])
  104. q->queue[i] = 0;
  105. else
  106. q->queue[i] -= adjustment;
  107. ++i;
  108. if (i == q->max_size)
  109. i = 0;
  110. }
  111. if (adjustment > q->queue[i])
  112. q->queue[i] = 0;
  113. else
  114. q->queue[i] -= adjustment;
  115. }
  116. int
  117. count_backslash_newlines(boost::wave::cpplexer::re2clex::Scanner *s,
  118. boost::wave::cpplexer::re2clex::uchar *cursor)
  119. {
  120. using namespace boost::wave::cpplexer::re2clex;
  121. std::size_t diff, offset;
  122. int skipped = 0;
  123. /* figure out how many backslash-newlines skipped over unknowingly. */
  124. diff = cursor - s->bot;
  125. offset = get_first_eol_offset(s);
  126. while (offset <= diff && offset != (unsigned int)-1)
  127. {
  128. skipped++;
  129. boost::wave::cpplexer::re2clex::aq_pop(s->eol_offsets);
  130. offset = get_first_eol_offset(s);
  131. }
  132. return skipped;
  133. }
  134. bool is_backslash(
  135. boost::wave::cpplexer::re2clex::uchar *p,
  136. boost::wave::cpplexer::re2clex::uchar *end, int &len)
  137. {
  138. if (*p == '\\') {
  139. len = 1;
  140. return true;
  141. }
  142. else if (*p == '?' && *(p+1) == '?' && (p+2 < end && *(p+2) == '/')) {
  143. len = 3;
  144. return true;
  145. }
  146. return false;
  147. }
  148. boost::wave::cpplexer::re2clex::uchar *
  149. fill(boost::wave::cpplexer::re2clex::Scanner *s,
  150. boost::wave::cpplexer::re2clex::uchar *cursor)
  151. {
  152. using namespace std; // some systems have memcpy etc. in namespace std
  153. using namespace boost::wave::cpplexer::re2clex;
  154. if(!s->eof)
  155. {
  156. uchar* p;
  157. std::ptrdiff_t cnt = s->tok - s->bot;
  158. if(cnt)
  159. {
  160. memcpy(s->bot, s->tok, s->lim - s->tok);
  161. s->tok = s->bot;
  162. s->ptr -= cnt;
  163. cursor -= cnt;
  164. s->lim -= cnt;
  165. adjust_eol_offsets(s, cnt);
  166. }
  167. if((s->top - s->lim) < BOOST_WAVE_BSIZE)
  168. {
  169. uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BOOST_WAVE_BSIZE)*sizeof(uchar));
  170. if (buf == 0)
  171. {
  172. using namespace std; // some systems have printf in std
  173. if (0 != s->error_proc) {
  174. (*s->error_proc)(s,
  175. cpplexer::lexing_exception::unexpected_error,
  176. "Out of memory!");
  177. }
  178. else
  179. printf("Out of memory!\n");
  180. /* get the scanner to stop */
  181. *cursor = 0;
  182. return cursor;
  183. }
  184. memcpy(buf, s->tok, s->lim - s->tok);
  185. s->tok = buf;
  186. s->ptr = &buf[s->ptr - s->bot];
  187. cursor = &buf[cursor - s->bot];
  188. s->lim = &buf[s->lim - s->bot];
  189. s->top = &s->lim[BOOST_WAVE_BSIZE];
  190. free(s->bot);
  191. s->bot = buf;
  192. }
  193. if (s->act != 0) {
  194. cnt = s->last - s->act;
  195. if (cnt > BOOST_WAVE_BSIZE)
  196. cnt = BOOST_WAVE_BSIZE;
  197. memcpy(s->lim, s->act, cnt);
  198. s->act += cnt;
  199. if (cnt != BOOST_WAVE_BSIZE)
  200. {
  201. s->eof = &s->lim[cnt]; *(s->eof)++ = '\0';
  202. }
  203. }
  204. /* backslash-newline erasing time */
  205. /* first scan for backslash-newline and erase them */
  206. for (p = s->lim; p < s->lim + cnt - 2; ++p)
  207. {
  208. int len = 0;
  209. if (is_backslash(p, s->lim + cnt, len))
  210. {
  211. if (*(p+len) == '\n')
  212. {
  213. int offset = len + 1;
  214. memmove(p, p + offset, s->lim + cnt - p - offset);
  215. cnt -= offset;
  216. --p;
  217. aq_enqueue(s->eol_offsets, p - s->bot + 1);
  218. }
  219. else if (*(p+len) == '\r')
  220. {
  221. if (*(p+len+1) == '\n')
  222. {
  223. int offset = len + 2;
  224. memmove(p, p + offset, s->lim + cnt - p - offset);
  225. cnt -= offset;
  226. --p;
  227. }
  228. else
  229. {
  230. int offset = len + 1;
  231. memmove(p, p + offset, s->lim + cnt - p - offset);
  232. cnt -= offset;
  233. --p;
  234. }
  235. aq_enqueue(s->eol_offsets, p - s->bot + 1);
  236. }
  237. }
  238. }
  239. /* FIXME: the following code should be fixed to recognize correctly the
  240. trigraph backslash token */
  241. /* check to see if what we just read ends in a backslash */
  242. if (cnt >= 2)
  243. {
  244. uchar last = s->lim[cnt-1];
  245. uchar last2 = s->lim[cnt-2];
  246. /* check \ EOB */
  247. if (last == '\\')
  248. {
  249. int next = get_one_char(s);
  250. /* check for \ \n or \ \r or \ \r \n straddling the border */
  251. if (next == '\n')
  252. {
  253. --cnt; /* chop the final \, we've already read the \n. */
  254. boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
  255. cnt + (s->lim - s->bot));
  256. }
  257. else if (next == '\r')
  258. {
  259. int next2 = get_one_char(s);
  260. if (next2 == '\n')
  261. {
  262. --cnt; /* skip the backslash */
  263. }
  264. else
  265. {
  266. /* rewind one, and skip one char */
  267. rewind_stream(s, -1);
  268. --cnt;
  269. }
  270. boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
  271. cnt + (s->lim - s->bot));
  272. }
  273. else if (next != -1) /* -1 means end of file */
  274. {
  275. /* next was something else, so rewind the stream */
  276. rewind_stream(s, -1);
  277. }
  278. }
  279. /* check \ \r EOB */
  280. else if (last == '\r' && last2 == '\\')
  281. {
  282. int next = get_one_char(s);
  283. if (next == '\n')
  284. {
  285. cnt -= 2; /* skip the \ \r */
  286. }
  287. else
  288. {
  289. /* rewind one, and skip two chars */
  290. rewind_stream(s, -1);
  291. cnt -= 2;
  292. }
  293. boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
  294. cnt + (s->lim - s->bot));
  295. }
  296. /* check \ \n EOB */
  297. else if (last == '\n' && last2 == '\\')
  298. {
  299. cnt -= 2;
  300. boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
  301. cnt + (s->lim - s->bot));
  302. }
  303. }
  304. s->lim += cnt;
  305. if (s->eof) /* eof needs adjusting if we erased backslash-newlines */
  306. {
  307. s->eof = s->lim;
  308. *(s->eof)++ = '\0';
  309. }
  310. }
  311. return cursor;
  312. }
  313. boost::wave::token_id
  314. scan(boost::wave::cpplexer::re2clex::Scanner *s)
  315. {
  316. using namespace boost::wave::cpplexer::re2clex;
  317. uchar *cursor = s->tok = s->cur;
  318. /*!re2c
  319. re2c:indent:string = " ";
  320. any = [\t\v\f\r\n\040-\377];
  321. anyctrl = [\000-\377];
  322. OctalDigit = [0-7];
  323. Digit = [0-9];
  324. HexDigit = [a-fA-F0-9];
  325. ExponentPart = [Ee] [+-]? Digit+;
  326. FractionalConstant = (Digit* "." Digit+) | (Digit+ ".");
  327. FloatingSuffix = [fF][lL]?|[lL][fF]?;
  328. IntegerSuffix = [uU][lL]?|[lL][uU]?;
  329. FixedPointSuffix = [dD];
  330. Backslash = [\\]|"??/";
  331. EscapeSequence = Backslash ([abfnrtv?'"] | Backslash | "x" HexDigit+ | OctalDigit OctalDigit? OctalDigit?);
  332. HexQuad = HexDigit HexDigit HexDigit HexDigit;
  333. UniversalChar = Backslash ("u" HexQuad | "U" HexQuad HexQuad);
  334. Newline = "\r\n" | "\n" | "\r";
  335. PPSpace = ([ \t]|("/*"(any\[*]|Newline|("*"+(any\[*/]|Newline)))*"*"+"/"))*;
  336. Pound = "#" | "??=" | "%:";
  337. */
  338. /*!re2c
  339. "/*" { goto ccomment; }
  340. "//" { goto cppcomment; }
  341. "TRUE" { BOOST_WAVE_RET(T_TRUE); }
  342. "FALSE" { BOOST_WAVE_RET(T_FALSE); }
  343. "{" { BOOST_WAVE_RET(T_LEFTBRACE); }
  344. "}" { BOOST_WAVE_RET(T_RIGHTBRACE); }
  345. "[" { BOOST_WAVE_RET(T_LEFTBRACKET); }
  346. "]" { BOOST_WAVE_RET(T_RIGHTBRACKET); }
  347. "#" { BOOST_WAVE_RET(T_POUND); }
  348. "##" { BOOST_WAVE_RET(T_POUND_POUND); }
  349. "(" { BOOST_WAVE_RET(T_LEFTPAREN); }
  350. ")" { BOOST_WAVE_RET(T_RIGHTPAREN); }
  351. ";" { BOOST_WAVE_RET(T_SEMICOLON); }
  352. ":" { BOOST_WAVE_RET(T_COLON); }
  353. "?" { BOOST_WAVE_RET(T_QUESTION_MARK); }
  354. "." { BOOST_WAVE_RET(T_DOT); }
  355. "+" { BOOST_WAVE_RET(T_PLUS); }
  356. "-" { BOOST_WAVE_RET(T_MINUS); }
  357. "*" { BOOST_WAVE_RET(T_STAR); }
  358. "/" { BOOST_WAVE_RET(T_DIVIDE); }
  359. "%" { BOOST_WAVE_RET(T_PERCENT); }
  360. "^" { BOOST_WAVE_RET(T_XOR); }
  361. "&" { BOOST_WAVE_RET(T_AND); }
  362. "|" { BOOST_WAVE_RET(T_OR); }
  363. "~" { BOOST_WAVE_RET(T_COMPL); }
  364. "!" { BOOST_WAVE_RET(T_NOT); }
  365. "=" { BOOST_WAVE_RET(T_ASSIGN); }
  366. "<" { BOOST_WAVE_RET(T_LESS); }
  367. ">" { BOOST_WAVE_RET(T_GREATER); }
  368. "<<" { BOOST_WAVE_RET(T_SHIFTLEFT); }
  369. ">>" { BOOST_WAVE_RET(T_SHIFTRIGHT); }
  370. "==" { BOOST_WAVE_RET(T_EQUAL); }
  371. "!=" { BOOST_WAVE_RET(T_NOTEQUAL); }
  372. "<=" { BOOST_WAVE_RET(T_LESSEQUAL); }
  373. ">=" { BOOST_WAVE_RET(T_GREATEREQUAL); }
  374. "&&" { BOOST_WAVE_RET(T_ANDAND); }
  375. "||" { BOOST_WAVE_RET(T_OROR); }
  376. "++" { BOOST_WAVE_RET(T_PLUSPLUS); }
  377. "--" { BOOST_WAVE_RET(T_MINUSMINUS); }
  378. "," { BOOST_WAVE_RET(T_COMMA); }
  379. ([a-zA-Z_] | UniversalChar) ([a-zA-Z_0-9] | UniversalChar)*
  380. { BOOST_WAVE_RET(T_IDENTIFIER); }
  381. (("0" [xX] HexDigit+) | ("0" OctalDigit*) | ([1-9] Digit*)) IntegerSuffix?
  382. { BOOST_WAVE_RET(T_INTLIT); }
  383. ((FractionalConstant ExponentPart?) | (Digit+ ExponentPart)) FloatingSuffix?
  384. { BOOST_WAVE_RET(T_FLOATLIT); }
  385. (FractionalConstant | Digit+) FixedPointSuffix
  386. { BOOST_WAVE_RET(T_FIXEDPOINTLIT); }
  387. "L"? (['] (EscapeSequence|any\[\n\r\\']|UniversalChar)+ ['])
  388. { BOOST_WAVE_RET(T_CHARLIT); }
  389. "L"? (["] (EscapeSequence|any\[\n\r\\"]|UniversalChar)* ["])
  390. { BOOST_WAVE_RET(T_STRINGLIT); }
  391. Pound PPSpace "include" PPSpace "<" (any\[\n\r>])+ ">"
  392. { BOOST_WAVE_RET(T_PP_HHEADER); }
  393. Pound PPSpace "include" PPSpace "\"" (any\[\n\r"])+ "\""
  394. { BOOST_WAVE_RET(T_PP_QHEADER); }
  395. Pound PPSpace "include" PPSpace
  396. { BOOST_WAVE_RET(T_PP_INCLUDE); }
  397. Pound PPSpace "if" { BOOST_WAVE_RET(T_PP_IF); }
  398. Pound PPSpace "ifdef" { BOOST_WAVE_RET(T_PP_IFDEF); }
  399. Pound PPSpace "ifndef" { BOOST_WAVE_RET(T_PP_IFNDEF); }
  400. Pound PPSpace "else" { BOOST_WAVE_RET(T_PP_ELSE); }
  401. Pound PPSpace "elif" { BOOST_WAVE_RET(T_PP_ELIF); }
  402. Pound PPSpace "endif" { BOOST_WAVE_RET(T_PP_ENDIF); }
  403. Pound PPSpace "define" { BOOST_WAVE_RET(T_PP_DEFINE); }
  404. Pound PPSpace "undef" { BOOST_WAVE_RET(T_PP_UNDEF); }
  405. Pound PPSpace "line" { BOOST_WAVE_RET(T_PP_LINE); }
  406. Pound PPSpace "error" { BOOST_WAVE_RET(T_PP_ERROR); }
  407. Pound PPSpace "pragma" { BOOST_WAVE_RET(T_PP_PRAGMA); }
  408. Pound PPSpace "warning" { BOOST_WAVE_RET(T_PP_WARNING); }
  409. [ \t\v\f]+
  410. { BOOST_WAVE_RET(T_SPACE); }
  411. Newline
  412. {
  413. s->line++;
  414. BOOST_WAVE_RET(T_NEWLINE);
  415. }
  416. "\000"
  417. {
  418. if(cursor != s->eof)
  419. {
  420. using namespace std; // some systems have printf in std
  421. if (0 != s->error_proc) {
  422. (*s->error_proc)(s,
  423. cpplexer::lexing_exception::generic_lexing_error,
  424. "'\\000' in input stream");
  425. }
  426. else
  427. printf("Error: 0 in file\n");
  428. }
  429. BOOST_WAVE_RET(T_EOF);
  430. }
  431. anyctrl
  432. {
  433. BOOST_WAVE_RET(TOKEN_FROM_ID(*s->tok, UnknownTokenType));
  434. }
  435. */
  436. ccomment:
  437. /*!re2c
  438. "*/" { BOOST_WAVE_RET(T_CCOMMENT); }
  439. Newline
  440. {
  441. /*if(cursor == s->eof) BOOST_WAVE_RET(T_EOF);*/
  442. /*s->tok = cursor; */
  443. s->line += count_backslash_newlines(s, cursor) +1;
  444. goto ccomment;
  445. }
  446. any { goto ccomment; }
  447. "\000"
  448. {
  449. using namespace std; // some systems have printf in std
  450. if(cursor == s->eof)
  451. {
  452. if (s->error_proc)
  453. (*s->error_proc)(s,
  454. cpplexer::lexing_exception::generic_lexing_warning,
  455. "Unterminated comment");
  456. else
  457. printf("Error: Unterminated comment\n");
  458. }
  459. else
  460. {
  461. if (s->error_proc)
  462. (*s->error_proc)(s,
  463. cpplexer::lexing_exception::generic_lexing_error,
  464. "'\\000' in input stream");
  465. else
  466. printf("Error: 0 in file");
  467. }
  468. /* adjust cursor such next call returns T_EOF */
  469. --YYCURSOR;
  470. /* the comment is unterminated, but nevertheless its a comment */
  471. BOOST_WAVE_RET(T_CCOMMENT);
  472. }
  473. anyctrl
  474. {
  475. if (s->error_proc)
  476. (*s->error_proc)(s,
  477. cpplexer::lexing_exception::generic_lexing_error,
  478. "invalid character in input stream");
  479. else
  480. printf("Error: 0 in file");
  481. }
  482. */
  483. cppcomment:
  484. /*!re2c
  485. Newline
  486. {
  487. /*if(cursor == s->eof) BOOST_WAVE_RET(T_EOF); */
  488. /*s->tok = cursor; */
  489. s->line++;
  490. BOOST_WAVE_RET(T_CPPCOMMENT);
  491. }
  492. any { goto cppcomment; }
  493. "\000"
  494. {
  495. using namespace std; // some systems have printf in std
  496. if(cursor != s->eof)
  497. {
  498. if (s->error_proc)
  499. (*s->error_proc)(s,
  500. cpplexer::lexing_exception::generic_lexing_error,
  501. "'\\000' in input stream");
  502. else
  503. printf("Error: 0 in file");
  504. }
  505. /* adjust cursor such next call returns T_EOF */
  506. --YYCURSOR;
  507. /* the comment is unterminated, but nevertheless its a comment */
  508. BOOST_WAVE_RET(T_CPPCOMMENT);
  509. }
  510. */
  511. } /* end of scan */
  512. #undef RE2C_ASSERT
  513. ///////////////////////////////////////////////////////////////////////////////
  514. } // namespace re2clex
  515. } // namespace idllexer
  516. } // namespace wave
  517. } // namespace boost