example2.cpp 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. // Copyright (c) 2001-2010 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. // This example shows how to create a simple lexer recognizing a couple of
  6. // different tokens and how to use this with a grammar. This example has a
  7. // heavily backtracking grammar which makes it a candidate for lexer based
  8. // parsing (all tokens are scanned and generated only once, even if
  9. // backtracking is required) which speeds up the overall parsing process
  10. // considerably, out-weighting the overhead needed for setting up the lexer.
  11. // Additionally it demonstrates how to use one of the defined tokens as a
  12. // parser component in the grammar.
  13. //
  14. // The grammar recognizes a simple input structure: any number of English
  15. // simple sentences (statements, questions and commands) are recognized and
  16. // are being counted separately.
  17. // #define BOOST_SPIRIT_DEBUG
  18. // #define BOOST_SPIRIT_LEXERTL_DEBUG
  19. #include <boost/config/warning_disable.hpp>
  20. #include <boost/spirit/include/qi.hpp>
  21. #include <boost/spirit/include/lex_lexertl.hpp>
  22. #include <boost/spirit/include/phoenix_operator.hpp>
  23. #include <iostream>
  24. #include <fstream>
  25. #include <string>
  26. #include "example.hpp"
  27. using namespace boost::spirit;
  28. using namespace boost::spirit::ascii;
  29. using boost::phoenix::ref;
  30. ///////////////////////////////////////////////////////////////////////////////
  31. // Token definition
  32. ///////////////////////////////////////////////////////////////////////////////
  33. template <typename Lexer>
  34. struct example2_tokens : lex::lexer<Lexer>
  35. {
  36. example2_tokens()
  37. {
  38. // A 'word' is comprised of one or more letters and an optional
  39. // apostrophe. If it contains an apostrophe, there may only be one and
  40. // the apostrophe must be preceded and succeeded by at least 1 letter.
  41. // For example, "I'm" and "doesn't" meet the definition of 'word' we
  42. // define below.
  43. word = "[a-zA-Z]+('[a-zA-Z]+)?";
  44. // Associate the tokens and the token set with the lexer. Note that
  45. // single character token definitions as used below always get
  46. // interpreted literally and never as special regex characters. This is
  47. // done to be able to assign single characters the id of their character
  48. // code value, allowing to reference those as literals in Qi grammars.
  49. this->self = lex::token_def<>(',') | '!' | '.' | '?' | ' ' | '\n' | word;
  50. }
  51. lex::token_def<> word;
  52. };
  53. ///////////////////////////////////////////////////////////////////////////////
  54. // Grammar definition
  55. ///////////////////////////////////////////////////////////////////////////////
  56. template <typename Iterator>
  57. struct example2_grammar : qi::grammar<Iterator>
  58. {
  59. template <typename TokenDef>
  60. example2_grammar(TokenDef const& tok)
  61. : example2_grammar::base_type(story)
  62. , paragraphs(0), commands(0), questions(0), statements(0)
  63. {
  64. story
  65. = +paragraph
  66. ;
  67. paragraph
  68. = ( +( command [ ++ref(commands) ]
  69. | question [ ++ref(questions) ]
  70. | statement [ ++ref(statements) ]
  71. )
  72. >> *char_(' ') >> +char_('\n')
  73. )
  74. [ ++ref(paragraphs) ]
  75. ;
  76. command
  77. = +(tok.word | ' ' | ',') >> '!'
  78. ;
  79. question
  80. = +(tok.word | ' ' | ',') >> '?'
  81. ;
  82. statement
  83. = +(tok.word | ' ' | ',') >> '.'
  84. ;
  85. BOOST_SPIRIT_DEBUG_NODE(story);
  86. BOOST_SPIRIT_DEBUG_NODE(paragraph);
  87. BOOST_SPIRIT_DEBUG_NODE(command);
  88. BOOST_SPIRIT_DEBUG_NODE(question);
  89. BOOST_SPIRIT_DEBUG_NODE(statement);
  90. }
  91. qi::rule<Iterator> story, paragraph, command, question, statement;
  92. int paragraphs, commands, questions, statements;
  93. };
  94. ///////////////////////////////////////////////////////////////////////////////
  95. int main()
  96. {
  97. // iterator type used to expose the underlying input stream
  98. typedef std::string::iterator base_iterator_type;
  99. // This is the token type to return from the lexer iterator
  100. typedef lex::lexertl::token<base_iterator_type> token_type;
  101. // This is the lexer type to use to tokenize the input.
  102. // Here we use the lexertl based lexer engine.
  103. typedef lex::lexertl::lexer<token_type> lexer_type;
  104. // This is the token definition type (derived from the given lexer type).
  105. typedef example2_tokens<lexer_type> example2_tokens;
  106. // this is the iterator type exposed by the lexer
  107. typedef example2_tokens::iterator_type iterator_type;
  108. // this is the type of the grammar to parse
  109. typedef example2_grammar<iterator_type> example2_grammar;
  110. // now we use the types defined above to create the lexer and grammar
  111. // object instances needed to invoke the parsing process
  112. example2_tokens tokens; // Our lexer
  113. example2_grammar calc(tokens); // Our parser
  114. std::string str (read_from_file("example2.input"));
  115. // At this point we generate the iterator pair used to expose the
  116. // tokenized input stream.
  117. std::string::iterator it = str.begin();
  118. iterator_type iter = tokens.begin(it, str.end());
  119. iterator_type end = tokens.end();
  120. // Parsing is done based on the token stream, not the character
  121. // stream read from the input.
  122. bool r = qi::parse(iter, end, calc);
  123. if (r && iter == end)
  124. {
  125. std::cout << "-------------------------\n";
  126. std::cout << "Parsing succeeded\n";
  127. std::cout << "There were "
  128. << calc.commands << " commands, "
  129. << calc.questions << " questions, and "
  130. << calc.statements << " statements.\n";
  131. std::cout << "-------------------------\n";
  132. }
  133. else
  134. {
  135. std::cout << "-------------------------\n";
  136. std::cout << "Parsing failed\n";
  137. std::cout << "-------------------------\n";
  138. }
  139. std::cout << "Bye... :-) \n\n";
  140. return 0;
  141. }