strip_comments_lexer.cpp 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. // Copyright (c) 2001-2010 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. // This example is the equivalent to the following lex program:
  6. //
  7. // %{
  8. // /* INITIAL is the default start state. COMMENT is our new */
  9. // /* state where we remove comments. */
  10. // %}
  11. //
  12. // %s COMMENT
  13. // %%
  14. // <INITIAL>"//".* ;
  15. // <INITIAL>"/*" BEGIN COMMENT;
  16. // <INITIAL>. ECHO;
  17. // <INITIAL>[\n] ECHO;
  18. // <COMMENT>"*/" BEGIN INITIAL;
  19. // <COMMENT>. ;
  20. // <COMMENT>[\n] ;
  21. // %%
  22. //
  23. // main()
  24. // {
  25. // yylex();
  26. // }
  27. //
  28. // Its purpose is to strip comments out of C code.
  29. //
  30. // Additionally this example demonstrates the use of lexer states to structure
  31. // the lexer definition.
  32. // #define BOOST_SPIRIT_LEXERTL_DEBUG
  33. #include <boost/config/warning_disable.hpp>
  34. #include <boost/spirit/include/lex_lexertl.hpp>
  35. #include <boost/spirit/include/phoenix_operator.hpp>
  36. #include <boost/spirit/include/phoenix_statement.hpp>
  37. #include <boost/spirit/include/phoenix_core.hpp>
  38. #include <iostream>
  39. #include <string>
  40. #include "example.hpp"
  41. using namespace boost::spirit;
  42. ///////////////////////////////////////////////////////////////////////////////
  43. // Token definition: We use the lexertl based lexer engine as the underlying
  44. // lexer type.
  45. ///////////////////////////////////////////////////////////////////////////////
  46. enum tokenids
  47. {
  48. IDANY = lex::min_token_id + 10,
  49. IDEOL = lex::min_token_id + 11
  50. };
  51. ///////////////////////////////////////////////////////////////////////////////
  52. // Simple custom semantic action function object used to print the matched
  53. // input sequence for a particular token
  54. template <typename Char, typename Traits>
  55. struct echo_input_functor
  56. {
  57. echo_input_functor (std::basic_ostream<Char, Traits>& os_)
  58. : os(os_) {}
  59. // This is called by the semantic action handling code during the lexing
  60. template <typename Iterator, typename Context>
  61. void operator()(Iterator const& b, Iterator const& e
  62. , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&
  63. , std::size_t&, Context&) const
  64. {
  65. os << std::string(b, e);
  66. }
  67. std::basic_ostream<Char, Traits>& os;
  68. };
  69. template <typename Char, typename Traits>
  70. inline echo_input_functor<Char, Traits>
  71. echo_input(std::basic_ostream<Char, Traits>& os)
  72. {
  73. return echo_input_functor<Char, Traits>(os);
  74. }
  75. ///////////////////////////////////////////////////////////////////////////////
  76. // Another simple custom semantic action function object used to switch the
  77. // state of the lexer
  78. struct set_lexer_state
  79. {
  80. set_lexer_state(char const* state_)
  81. : state(state_) {}
  82. // This is called by the semantic action handling code during the lexing
  83. template <typename Iterator, typename Context>
  84. void operator()(Iterator const&, Iterator const&
  85. , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&
  86. , std::size_t&, Context& ctx) const
  87. {
  88. ctx.set_state_name(state.c_str());
  89. }
  90. std::string state;
  91. };
  92. ///////////////////////////////////////////////////////////////////////////////
  93. template <typename Lexer>
  94. struct strip_comments_tokens : lex::lexer<Lexer>
  95. {
  96. strip_comments_tokens()
  97. : strip_comments_tokens::base_type(lex::match_flags::match_default)
  98. {
  99. // define tokens and associate them with the lexer
  100. cppcomment = "\"//\"[^\n]*"; // '//[^\n]*'
  101. ccomment = "\"/*\""; // '/*'
  102. endcomment = "\"*/\""; // '*/'
  103. any = std::string(".");
  104. eol = "\n";
  105. // The following tokens are associated with the default lexer state
  106. // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
  107. // strictly optional.
  108. this->self
  109. = cppcomment
  110. | ccomment [ set_lexer_state("COMMENT") ]
  111. | eol [ echo_input(std::cout) ]
  112. | any [ echo_input(std::cout) ]
  113. ;
  114. // The following tokens are associated with the lexer state 'COMMENT'.
  115. this->self("COMMENT")
  116. = endcomment [ set_lexer_state("INITIAL") ]
  117. | "\n"
  118. | std::string(".")
  119. ;
  120. }
  121. lex::token_def<> cppcomment, ccomment, endcomment, any, eol;
  122. };
  123. ///////////////////////////////////////////////////////////////////////////////
  124. int main(int argc, char* argv[])
  125. {
  126. // iterator type used to expose the underlying input stream
  127. typedef std::string::iterator base_iterator_type;
  128. // lexer type
  129. typedef
  130. lex::lexertl::actor_lexer<lex::lexertl::token<base_iterator_type> >
  131. lexer_type;
  132. // now we use the types defined above to create the lexer and grammar
  133. // object instances needed to invoke the parsing process
  134. strip_comments_tokens<lexer_type> strip_comments; // Our lexer
  135. // No parsing is done alltogether, everything happens in the lexer semantic
  136. // actions.
  137. std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
  138. base_iterator_type first = str.begin();
  139. bool r = lex::tokenize(first, str.end(), strip_comments);
  140. if (!r) {
  141. std::string rest(first, str.end());
  142. std::cerr << "Lexical analysis failed\n" << "stopped at: \""
  143. << rest << "\"\n";
  144. }
  145. return 0;
  146. }