example1.cpp 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. // Copyright (c) 2001-2010 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. // Simple lexer/parser to test the Spirit installation.
  6. //
  7. // This example shows, how to create a simple lexer recognizing 5 different
  8. // tokens, and how to use a single token definition as the skip parser during
  9. // the parsing. Additionally, it demonstrates how to use one of the defined
  10. // tokens as a parser component in the grammar.
  11. //
  12. // The grammar recognizes a simple input structure, for instance:
  13. //
  14. // {
  15. // hello world, hello it is me
  16. // }
  17. //
  18. // Any number of simple sentences (optionally comma separated) inside a pair
  19. // of curly braces will be matched.
  20. // #define BOOST_SPIRIT_LEXERTL_DEBUG
  21. #include <boost/config/warning_disable.hpp>
  22. #include <boost/spirit/include/qi.hpp>
  23. #include <boost/spirit/include/lex_lexertl.hpp>
  24. #include <iostream>
  25. #include <fstream>
  26. #include <string>
  27. #include "example.hpp"
  28. using namespace boost::spirit;
  29. ///////////////////////////////////////////////////////////////////////////////
  30. // Token definition
  31. ///////////////////////////////////////////////////////////////////////////////
  32. template <typename Lexer>
  33. struct example1_tokens : lex::lexer<Lexer>
  34. {
  35. example1_tokens()
  36. {
  37. // define tokens and associate them with the lexer
  38. identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
  39. this->self = lex::char_(',') | '{' | '}' | identifier;
  40. // any token definition to be used as the skip parser during parsing
  41. // has to be associated with a separate lexer state (here 'WS')
  42. this->white_space = "[ \\t\\n]+";
  43. this->self("WS") = white_space;
  44. }
  45. lex::token_def<> identifier, white_space;
  46. };
  47. ///////////////////////////////////////////////////////////////////////////////
  48. // Grammar definition
  49. ///////////////////////////////////////////////////////////////////////////////
  50. template <typename Iterator>
  51. struct example1_grammar
  52. : qi::grammar<Iterator, qi::in_state_skipper<lex::token_def<> > >
  53. {
  54. template <typename TokenDef>
  55. example1_grammar(TokenDef const& tok)
  56. : example1_grammar::base_type(start)
  57. {
  58. start = '{' >> *(tok.identifier >> -ascii::char_(',')) >> '}';
  59. }
  60. qi::rule<Iterator, qi::in_state_skipper<lex::token_def<> > > start;
  61. };
  62. ///////////////////////////////////////////////////////////////////////////////
  63. int main()
  64. {
  65. // iterator type used to expose the underlying input stream
  66. typedef std::string::iterator base_iterator_type;
  67. // This is the token type to return from the lexer iterator
  68. typedef lex::lexertl::token<base_iterator_type> token_type;
  69. // This is the lexer type to use to tokenize the input.
  70. // We use the lexertl based lexer engine.
  71. typedef lex::lexertl::lexer<token_type> lexer_type;
  72. // This is the lexer type (derived from the given lexer type).
  73. typedef example1_tokens<lexer_type> example1_lex;
  74. // This is the iterator type exposed by the lexer
  75. typedef example1_lex::iterator_type iterator_type;
  76. // This is the type of the grammar to parse
  77. typedef example1_grammar<iterator_type> example1_grammar;
  78. // now we use the types defined above to create the lexer and grammar
  79. // object instances needed to invoke the parsing process
  80. example1_lex lex; // Our lexer
  81. example1_grammar calc(lex); // Our grammar definition
  82. std::string str (read_from_file("example1.input"));
  83. // At this point we generate the iterator pair used to expose the
  84. // tokenized input stream.
  85. std::string::iterator it = str.begin();
  86. iterator_type iter = lex.begin(it, str.end());
  87. iterator_type end = lex.end();
  88. // Parsing is done based on the token stream, not the character
  89. // stream read from the input.
  90. // Note, how we use the token_def defined above as the skip parser. It must
  91. // be explicitly wrapped inside a state directive, switching the lexer
  92. // state for the duration of skipping whitespace.
  93. bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[lex.white_space]);
  94. if (r && iter == end)
  95. {
  96. std::cout << "-------------------------\n";
  97. std::cout << "Parsing succeeded\n";
  98. std::cout << "-------------------------\n";
  99. }
  100. else
  101. {
  102. std::string rest(iter, end);
  103. std::cout << "-------------------------\n";
  104. std::cout << "Parsing failed\n";
  105. std::cout << "stopped at: \"" << rest << "\"\n";
  106. std::cout << "-------------------------\n";
  107. }
  108. std::cout << "Bye... :-) \n\n";
  109. return 0;
  110. }