word_count_lexer.cpp 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. // Copyright (c) 2001-2010 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. // This example is the equivalent to the following lex program:
  6. /*
  7. //[wcl_flex_version
  8. %{
  9. int c = 0, w = 0, l = 0;
  10. %}
  11. %%
  12. [^ \t\n]+ { ++w; c += yyleng; }
  13. \n { ++c; ++l; }
  14. . { ++c; }
  15. %%
  16. main()
  17. {
  18. yylex();
  19. printf("%d %d %d\n", l, w, c);
  20. }
  21. //]
  22. */
  23. // Its purpose is to do the word count function of the wc command in UNIX. It
  24. // prints the number of lines, words and characters in a file.
  25. //
  26. // This examples shows how to use semantic actions associated with token
  27. // definitions to directly attach actions to tokens. These get executed
  28. // whenever the corresponding token got matched in the input sequence. Note,
  29. // how this example implements all functionality directly in the lexer
  30. // definition without any need for a parser.
  31. // #define BOOST_SPIRIT_LEXERTL_DEBUG
  32. #include <boost/config/warning_disable.hpp>
  33. //[wcl_includes
  34. #include <boost/spirit/include/lex_lexertl.hpp>
  35. #include <boost/spirit/include/phoenix_operator.hpp>
  36. #include <boost/spirit/include/phoenix_statement.hpp>
  37. #include <boost/spirit/include/phoenix_algorithm.hpp>
  38. #include <boost/spirit/include/phoenix_core.hpp>
  39. //]
  40. #include <iostream>
  41. #include <string>
  42. #include "example.hpp"
  43. //[wcl_namespaces
  44. namespace lex = boost::spirit::lex;
  45. //]
  46. ///////////////////////////////////////////////////////////////////////////////
  47. // Token definition: We use the lexertl based lexer engine as the underlying
  48. // lexer type.
  49. //
  50. // Note, the token definition type is derived from the 'lexertl_actor_lexer'
  51. // template, which is a necessary to being able to use lexer semantic actions.
  52. ///////////////////////////////////////////////////////////////////////////////
  53. struct distance_func
  54. {
  55. template <typename Iterator1, typename Iterator2>
  56. struct result : boost::iterator_difference<Iterator1> {};
  57. template <typename Iterator1, typename Iterator2>
  58. typename result<Iterator1, Iterator2>::type
  59. operator()(Iterator1 const& begin, Iterator2 const& end) const
  60. {
  61. return std::distance(begin, end);
  62. }
  63. };
  64. boost::phoenix::function<distance_func> const distance = distance_func();
  65. //[wcl_token_definition
  66. template <typename Lexer>
  67. struct word_count_tokens : lex::lexer<Lexer>
  68. {
  69. word_count_tokens()
  70. : c(0), w(0), l(0)
  71. , word("[^ \t\n]+") // define tokens
  72. , eol("\n")
  73. , any(".")
  74. {
  75. using boost::spirit::lex::_start;
  76. using boost::spirit::lex::_end;
  77. using boost::phoenix::ref;
  78. // associate tokens with the lexer
  79. this->self
  80. = word [++ref(w), ref(c) += distance(_start, _end)]
  81. | eol [++ref(c), ++ref(l)]
  82. | any [++ref(c)]
  83. ;
  84. }
  85. std::size_t c, w, l;
  86. lex::token_def<> word, eol, any;
  87. };
  88. //]
  89. ///////////////////////////////////////////////////////////////////////////////
  90. //[wcl_main
  91. int main(int argc, char* argv[])
  92. {
  93. /*< Specifying `omit` as the token attribute type generates a token class
  94. not holding any token attribute at all (not even the iterator range of the
  95. matched input sequence), therefore optimizing the token, the lexer, and
  96. possibly the parser implementation as much as possible. Specifying
  97. `mpl::false_` as the 3rd template parameter generates a token
  98. type and an iterator, both holding no lexer state, allowing for even more
  99. aggressive optimizations. As a result the token instances contain the token
  100. ids as the only data member.
  101. >*/ typedef
  102. lex::lexertl::token<char const*, lex::omit, boost::mpl::false_>
  103. token_type;
  104. /*< This defines the lexer type to use
  105. >*/ typedef lex::lexertl::actor_lexer<token_type> lexer_type;
  106. /*< Create the lexer object instance needed to invoke the lexical analysis
  107. >*/ word_count_tokens<lexer_type> word_count_lexer;
  108. /*< Read input from the given file, tokenize all the input, while discarding
  109. all generated tokens
  110. >*/ std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
  111. char const* first = str.c_str();
  112. char const* last = &first[str.size()];
  113. /*< Create a pair of iterators returning the sequence of generated tokens
  114. >*/ lexer_type::iterator_type iter = word_count_lexer.begin(first, last);
  115. lexer_type::iterator_type end = word_count_lexer.end();
  116. /*< Here we simply iterate over all tokens, making sure to break the loop
  117. if an invalid token gets returned from the lexer
  118. >*/ while (iter != end && token_is_valid(*iter))
  119. ++iter;
  120. if (iter == end) {
  121. std::cout << "lines: " << word_count_lexer.l
  122. << ", words: " << word_count_lexer.w
  123. << ", characters: " << word_count_lexer.c
  124. << "\n";
  125. }
  126. else {
  127. std::string rest(first, last);
  128. std::cout << "Lexical analysis failed\n" << "stopped at: \""
  129. << rest << "\"\n";
  130. }
  131. return 0;
  132. }
  133. //]