example3.cpp 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. // Copyright (c) 2001-2010 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. // This example shows how to create a simple lexer recognizing a couple of
  6. // different tokens and how to use this with a grammar. This example has a
  7. // heavily backtracking grammar which makes it a candidate for lexer based
  8. // parsing (all tokens are scanned and generated only once, even if
  9. // backtracking is required) which speeds up the overall parsing process
  10. // considerably, out-weighting the overhead needed for setting up the lexer.
  11. //
  12. // Additionally, this example demonstrates, how to define a token set usable
  13. // as the skip parser during parsing, allowing to define several tokens to be
  14. // ignored.
  15. //
  16. // This example recognizes couplets, which are sequences of numbers enclosed
  17. // in matching pairs of parenthesis. See the comments below to for details
  18. // and examples.
  19. // #define BOOST_SPIRIT_LEXERTL_DEBUG
  20. // #define BOOST_SPIRIT_DEBUG
  21. #include <boost/config/warning_disable.hpp>
  22. #include <boost/spirit/include/qi.hpp>
  23. #include <boost/spirit/include/lex_lexertl.hpp>
  24. #include <iostream>
  25. #include <fstream>
  26. #include <string>
  27. #include "example.hpp"
  28. using namespace boost::spirit;
  29. ///////////////////////////////////////////////////////////////////////////////
  30. // Token definition
  31. ///////////////////////////////////////////////////////////////////////////////
  32. template <typename Lexer>
  33. struct example3_tokens : lex::lexer<Lexer>
  34. {
  35. example3_tokens()
  36. {
  37. // define the tokens to match
  38. ellipses = "\\.\\.\\.";
  39. number = "[0-9]+";
  40. // associate the tokens and the token set with the lexer
  41. this->self = ellipses | '(' | ')' | number;
  42. // define the whitespace to ignore (spaces, tabs, newlines and C-style
  43. // comments)
  44. this->self("WS")
  45. = lex::token_def<>("[ \\t\\n]+") // whitespace
  46. | "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/" // C style comments
  47. ;
  48. }
  49. // these tokens expose the iterator_range of the matched input sequence
  50. lex::token_def<> ellipses, identifier, number;
  51. };
  52. ///////////////////////////////////////////////////////////////////////////////
  53. // Grammar definition
  54. ///////////////////////////////////////////////////////////////////////////////
  55. template <typename Iterator, typename Lexer>
  56. struct example3_grammar
  57. : qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
  58. {
  59. template <typename TokenDef>
  60. example3_grammar(TokenDef const& tok)
  61. : example3_grammar::base_type(start)
  62. {
  63. start
  64. = +(couplet | tok.ellipses)
  65. ;
  66. // A couplet matches nested left and right parenthesis.
  67. // For example:
  68. // (1) (1 2) (1 2 3) ...
  69. // ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ...
  70. // (((1))) ...
  71. couplet
  72. = tok.number
  73. | '(' >> +couplet >> ')'
  74. ;
  75. BOOST_SPIRIT_DEBUG_NODE(start);
  76. BOOST_SPIRIT_DEBUG_NODE(couplet);
  77. }
  78. qi::rule<Iterator, qi::in_state_skipper<Lexer> > start, couplet;
  79. };
  80. ///////////////////////////////////////////////////////////////////////////////
  81. int main()
  82. {
  83. // iterator type used to expose the underlying input stream
  84. typedef std::string::iterator base_iterator_type;
  85. // This is the token type to return from the lexer iterator
  86. typedef lex::lexertl::token<base_iterator_type> token_type;
  87. // This is the lexer type to use to tokenize the input.
  88. // Here we use the lexertl based lexer engine.
  89. typedef lex::lexertl::lexer<token_type> lexer_type;
  90. // This is the token definition type (derived from the given lexer type).
  91. typedef example3_tokens<lexer_type> example3_tokens;
  92. // this is the iterator type exposed by the lexer
  93. typedef example3_tokens::iterator_type iterator_type;
  94. // this is the type of the grammar to parse
  95. typedef example3_grammar<iterator_type, example3_tokens::lexer_def> example3_grammar;
  96. // now we use the types defined above to create the lexer and grammar
  97. // object instances needed to invoke the parsing process
  98. example3_tokens tokens; // Our lexer
  99. example3_grammar calc(tokens); // Our parser
  100. std::string str (read_from_file("example3.input"));
  101. // At this point we generate the iterator pair used to expose the
  102. // tokenized input stream.
  103. std::string::iterator it = str.begin();
  104. iterator_type iter = tokens.begin(it, str.end());
  105. iterator_type end = tokens.end();
  106. // Parsing is done based on the token stream, not the character
  107. // stream read from the input.
  108. // Note how we use the lexer defined above as the skip parser.
  109. bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]);
  110. if (r && iter == end)
  111. {
  112. std::cout << "-------------------------\n";
  113. std::cout << "Parsing succeeded\n";
  114. std::cout << "-------------------------\n";
  115. }
  116. else
  117. {
  118. std::cout << "-------------------------\n";
  119. std::cout << "Parsing failed\n";
  120. std::cout << "-------------------------\n";
  121. }
  122. std::cout << "Bye... :-) \n\n";
  123. return 0;
  124. }