idl_re2c_lexer.hpp 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. /*=============================================================================
  2. Boost.Wave: A Standard compliant C++ preprocessor library
  3. Re2C based IDL lexer
  4. http://www.boost.org/
  5. Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
  6. Software License, Version 1.0. (See accompanying file
  7. LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  8. =============================================================================*/
  9. #if !defined(IDL_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)
  10. #define IDL_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED
  11. #include <string>
  12. #include <cstdio>
  13. #include <cstdarg>
  14. #if defined(BOOST_SPIRIT_DEBUG)
  15. #include <iostream>
  16. #endif // defined(BOOST_SPIRIT_DEBUG)
  17. #include <boost/concept_check.hpp>
  18. #include <boost/assert.hpp>
  19. #include <boost/spirit/include/classic_core.hpp>
  20. #include <boost/wave/token_ids.hpp>
  21. #include <boost/wave/language_support.hpp>
  22. #include <boost/wave/util/file_position.hpp>
  23. #include <boost/wave/cpplexer/validate_universal_char.hpp>
  24. #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
  25. // reuse the default token type and re2c lexer helpers
  26. #include <boost/wave/cpplexer/cpp_lex_token.hpp>
  27. #include <boost/wave/cpplexer/cpp_lex_interface.hpp>
  28. #include <boost/wave/cpplexer/re2clex/scanner.hpp>
  29. #include "idl_re.hpp"
  30. ///////////////////////////////////////////////////////////////////////////////
  31. namespace boost {
  32. namespace wave {
  33. namespace idllexer {
  34. namespace re2clex {
  35. ///////////////////////////////////////////////////////////////////////////////
  36. //
  37. // encapsulation of the re2c based idl lexer
  38. //
  39. ///////////////////////////////////////////////////////////////////////////////
  40. template <
  41. typename IteratorT,
  42. typename PositionT = boost::wave::util::file_position_type
  43. >
  44. class lexer
  45. {
  46. typedef boost::wave::cpplexer::re2clex::Scanner scanner_t;
  47. public:
  48. typedef char char_t;
  49. typedef boost::wave::cpplexer::re2clex::Scanner base_t;
  50. typedef boost::wave::cpplexer::lex_token<PositionT> token_type;
  51. typedef typename token_type::string_type string_type;
  52. lexer(IteratorT const &first, IteratorT const &last,
  53. PositionT const &pos, boost::wave::language_support language);
  54. ~lexer();
  55. token_type& get(token_type& t);
  56. void set_position(PositionT const &pos)
  57. {
  58. // set position has to change the file name and line number only
  59. filename = pos.get_file();
  60. scanner.line = pos.get_line();
  61. scanner.file_name = filename.c_str();
  62. }
  63. // error reporting from the re2c generated lexer
  64. static int report_error(scanner_t const *s, int code, char const *, ...);
  65. private:
  66. static char const *tok_names[];
  67. scanner_t scanner;
  68. string_type filename;
  69. bool at_eof;
  70. boost::wave::language_support language;
  71. };
  72. ///////////////////////////////////////////////////////////////////////////////
  73. // initialize cpp lexer
  74. template <typename IteratorT, typename PositionT>
  75. inline
  76. lexer<IteratorT, PositionT>::lexer(IteratorT const &first,
  77. IteratorT const &last, PositionT const &pos,
  78. boost::wave::language_support language)
  79. : filename(pos.get_file()), at_eof(false), language(language)
  80. {
  81. using namespace std; // some systems have memset in std
  82. using namespace boost::wave::cpplexer::re2clex;
  83. memset(&scanner, '\0', sizeof(scanner_t));
  84. scanner.eol_offsets = aq_create();
  85. scanner.first = scanner.act = (uchar *)&(*first);
  86. scanner.last = scanner.first + std::distance(first, last);
  87. scanner.line = pos.get_line();
  88. scanner.error_proc = report_error;
  89. scanner.file_name = filename.c_str();
  90. // not used by the lexer
  91. scanner.enable_ms_extensions = 0;
  92. scanner.act_in_c99_mode = 0;
  93. boost::ignore_unused_variable_warning(language);
  94. }
  95. template <typename IteratorT, typename PositionT>
  96. inline
  97. lexer<IteratorT, PositionT>::~lexer()
  98. {
  99. boost::wave::cpplexer::re2clex::aq_terminate(scanner.eol_offsets);
  100. free(scanner.bot);
  101. }
  102. ///////////////////////////////////////////////////////////////////////////////
  103. // get the next token from the input stream
  104. template <typename IteratorT, typename PositionT>
  105. inline boost::wave::cpplexer::lex_token<PositionT>&
  106. lexer<IteratorT, PositionT>::get(boost::wave::cpplexer::lex_token<PositionT>& t)
  107. {
  108. using namespace boost::wave; // to import token ids to this scope
  109. if (at_eof)
  110. return t = boost::wave::cpplexer::lex_token<PositionT>(); // return T_EOI
  111. token_id id = token_id(scan(&scanner));
  112. string_type value((char const *)scanner.tok, scanner.cur-scanner.tok);
  113. if (T_IDENTIFIER == id) {
  114. // test identifier characters for validity (throws if invalid chars found)
  115. if (!boost::wave::need_no_character_validation(language)) {
  116. boost::wave::cpplexer::impl::validate_identifier_name(value,
  117. scanner.line, -1, filename);
  118. }
  119. }
  120. else if (T_STRINGLIT == id || T_CHARLIT == id) {
  121. // test literal characters for validity (throws if invalid chars found)
  122. if (!boost::wave::need_no_character_validation(language)) {
  123. boost::wave::cpplexer::impl::validate_literal(value, scanner.line,
  124. -1, filename);
  125. }
  126. }
  127. else if (T_EOF == id) {
  128. // T_EOF is returned as a valid token, the next call will return T_EOI,
  129. // i.e. the actual end of input
  130. at_eof = true;
  131. value.clear();
  132. }
  133. return t = boost::wave::cpplexer::lex_token<PositionT>(id, value,
  134. PositionT(filename, scanner.line, -1));
  135. }
  136. template <typename IteratorT, typename PositionT>
  137. inline int
  138. lexer<IteratorT, PositionT>::report_error(scanner_t const *s, int errcode,
  139. char const* msg, ...)
  140. {
  141. BOOST_ASSERT(0 != s);
  142. BOOST_ASSERT(0 != msg);
  143. using namespace std; // some system have vsprintf in namespace std
  144. char buffer[200]; // should be large enough
  145. va_list params;
  146. va_start(params, msg);
  147. vsprintf(buffer, msg, params);
  148. va_end(params);
  149. BOOST_WAVE_LEXER_THROW_VAR(boost::wave::cpplexer::lexing_exception,
  150. errcode, buffer, s->line, -1, s->file_name);
  151. return 0;
  152. }
  153. ///////////////////////////////////////////////////////////////////////////////
  154. //
  155. // lex_functor
  156. //
  157. ///////////////////////////////////////////////////////////////////////////////
  158. template <
  159. typename IteratorT,
  160. typename PositionT = boost::wave::util::file_position_type
  161. >
  162. class lex_functor
  163. : public lex_input_interface_generator<
  164. typename lexer<IteratorT, PositionT>::token_type
  165. >
  166. {
  167. public:
  168. typedef typename lexer<IteratorT, PositionT>::token_type token_type;
  169. lex_functor(IteratorT const &first, IteratorT const &last,
  170. PositionT const &pos, boost::wave::language_support language)
  171. : lexer(first, last, pos, language)
  172. {}
  173. virtual ~lex_functor() {}
  174. // get the next token from the input stream
  175. token_type& get(token_type& t) { return lexer.get(t); }
  176. void set_position(PositionT const &pos)
  177. { lexer.set_position(pos); }
  178. #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
  179. bool has_include_guards(std::string&) const { return false; }
  180. #endif
  181. private:
  182. lexer<IteratorT, PositionT> lexer;
  183. };
  184. } // namespace re2clex
  185. ///////////////////////////////////////////////////////////////////////////////
  186. //
  187. // The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)
  188. // should be defined inline, if the lex_functor shouldn't be instantiated
  189. // separately from the lex_iterator.
  190. //
  191. // Separate (explicit) instantiation helps to reduce compilation time.
  192. //
  193. ///////////////////////////////////////////////////////////////////////////////
  194. #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
  195. #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE
  196. #else
  197. #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE inline
  198. #endif
  199. ///////////////////////////////////////////////////////////////////////////////
  200. //
  201. // The 'new_lexer' function allows the opaque generation of a new lexer object.
  202. // It is coupled to the iterator type to allow to decouple the lexer/iterator
  203. // configurations at compile time.
  204. //
  205. // This function is declared inside the cpp_slex_token.hpp file, which is
  206. // referenced by the source file calling the lexer and the source file, which
  207. // instantiates the lex_functor. But it is defined here, so it will be
  208. // instantiated only while compiling the source file, which instantiates the
  209. // lex_functor. While the cpp_re2c_token.hpp file may be included everywhere,
  210. // this file (cpp_re2c_lexer.hpp) should be included only once. This allows
  211. // to decouple the lexer interface from the lexer implementation and reduces
  212. // compilation time.
  213. //
  214. ///////////////////////////////////////////////////////////////////////////////
  215. template <typename IteratorT, typename PositionT>
  216. BOOST_WAVE_RE2C_NEW_LEXER_INLINE
  217. cpplexer::lex_input_interface<cpplexer::lex_token<PositionT> > *
  218. new_lexer_gen<IteratorT, PositionT>::new_lexer(IteratorT const &first,
  219. IteratorT const &last, PositionT const &pos,
  220. wave::language_support language)
  221. {
  222. return new re2clex::lex_functor<IteratorT, PositionT>(first, last, pos,
  223. language);
  224. }
  225. #undef BOOST_WAVE_RE2C_NEW_LEXER_INLINE
  226. ///////////////////////////////////////////////////////////////////////////////
  227. } // namespace idllexer
  228. } // namespace wave
  229. } // namespace boost
  230. #endif // !defined(IDL_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)