example4.cpp 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. // Copyright (c) 2001-2010 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. // This example shows how to create a simple lexer recognizing a couple of
  6. // different tokens aimed at a simple language and how to use this lexer with
  7. // a grammar. It shows how to associate attributes to tokens and how to access
  8. // the token attributes from inside the grammar.
  9. //
  10. // We use explicit token attribute types, making the corresponding token instances
  11. // carry convert the matched input into an instance of that type. The token
  12. // attribute is exposed as the parser attribute if this token is used as a
  13. // parser component somewhere in a grammar.
  14. //
  15. // Additionally, this example demonstrates, how to define a token set usable
  16. // as the skip parser during parsing, allowing to define several tokens to be
  17. // ignored.
  18. //
  19. // This example recognizes a very simple programming language having
  20. // assignment statements and if and while control structures. Look at the file
  21. // example4.input for an example.
  22. #include <boost/config/warning_disable.hpp>
  23. #include <boost/spirit/include/qi.hpp>
  24. #include <boost/spirit/include/lex_lexertl.hpp>
  25. #include <boost/spirit/include/phoenix_operator.hpp>
  26. #include <iostream>
  27. #include <fstream>
  28. #include <string>
  29. #include "example.hpp"
  30. using namespace boost::spirit;
  31. using boost::phoenix::val;
  32. ///////////////////////////////////////////////////////////////////////////////
  33. // Token definition
  34. ///////////////////////////////////////////////////////////////////////////////
  35. template <typename Lexer>
  36. struct example4_tokens : lex::lexer<Lexer>
  37. {
  38. example4_tokens()
  39. {
  40. // define the tokens to match
  41. identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
  42. constant = "[0-9]+";
  43. if_ = "if";
  44. else_ = "else";
  45. while_ = "while";
  46. // associate the tokens and the token set with the lexer
  47. this->self = lex::token_def<>('(') | ')' | '{' | '}' | '=' | ';' | constant;
  48. this->self += if_ | else_ | while_ | identifier;
  49. // define the whitespace to ignore (spaces, tabs, newlines and C-style
  50. // comments)
  51. this->self("WS")
  52. = lex::token_def<>("[ \\t\\n]+")
  53. | "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
  54. ;
  55. }
  56. //[example4_token_def
  57. // these tokens expose the iterator_range of the matched input sequence
  58. lex::token_def<> if_, else_, while_;
  59. // The following two tokens have an associated attribute type, 'identifier'
  60. // carries a string (the identifier name) and 'constant' carries the
  61. // matched integer value.
  62. //
  63. // Note: any token attribute type explicitly specified in a token_def<>
  64. // declaration needs to be listed during token type definition as
  65. // well (see the typedef for the token_type below).
  66. //
  67. // The conversion of the matched input to an instance of this type occurs
  68. // once (on first access), which makes token attributes as efficient as
  69. // possible. Moreover, token instances are constructed once by the lexer
  70. // library. From this point on tokens are passed by reference only,
  71. // avoiding them being copied around.
  72. lex::token_def<std::string> identifier;
  73. lex::token_def<unsigned int> constant;
  74. //]
  75. };
  76. ///////////////////////////////////////////////////////////////////////////////
  77. // Grammar definition
  78. ///////////////////////////////////////////////////////////////////////////////
  79. template <typename Iterator, typename Lexer>
  80. struct example4_grammar
  81. : qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
  82. {
  83. template <typename TokenDef>
  84. example4_grammar(TokenDef const& tok)
  85. : example4_grammar::base_type(program)
  86. {
  87. using boost::spirit::_val;
  88. program
  89. = +block
  90. ;
  91. block
  92. = '{' >> *statement >> '}'
  93. ;
  94. statement
  95. = assignment
  96. | if_stmt
  97. | while_stmt
  98. ;
  99. assignment
  100. = (tok.identifier >> '=' >> expression >> ';')
  101. [
  102. std::cout << val("assignment statement to: ") << _1 << "\n"
  103. ]
  104. ;
  105. if_stmt
  106. = ( tok.if_ >> '(' >> expression >> ')' >> block
  107. >> -(tok.else_ >> block)
  108. )
  109. [
  110. std::cout << val("if expression: ") << _2 << "\n"
  111. ]
  112. ;
  113. while_stmt
  114. = (tok.while_ >> '(' >> expression >> ')' >> block)
  115. [
  116. std::cout << val("while expression: ") << _2 << "\n"
  117. ]
  118. ;
  119. // since expression has a variant return type accommodating for
  120. // std::string and unsigned integer, both possible values may be
  121. // returned to the calling rule
  122. expression
  123. = tok.identifier [ _val = _1 ]
  124. | tok.constant [ _val = _1 ]
  125. ;
  126. }
  127. typedef boost::variant<unsigned int, std::string> expression_type;
  128. qi::rule<Iterator, qi::in_state_skipper<Lexer> > program, block, statement;
  129. qi::rule<Iterator, qi::in_state_skipper<Lexer> > assignment, if_stmt;
  130. qi::rule<Iterator, qi::in_state_skipper<Lexer> > while_stmt;
  131. // the expression is the only rule having a return value
  132. qi::rule<Iterator, expression_type(), qi::in_state_skipper<Lexer> > expression;
  133. };
  134. ///////////////////////////////////////////////////////////////////////////////
  135. int main()
  136. {
  137. // iterator type used to expose the underlying input stream
  138. typedef std::string::iterator base_iterator_type;
  139. //[example4_token
  140. // This is the lexer token type to use. The second template parameter lists
  141. // all attribute types used for token_def's during token definition (see
  142. // calculator_tokens<> above). Here we use the predefined lexertl token
  143. // type, but any compatible token type may be used instead.
  144. //
  145. // If you don't list any token attribute types in the following declaration
  146. // (or just use the default token type: lexertl_token<base_iterator_type>)
  147. // it will compile and work just fine, just a bit less efficient. This is
  148. // because the token attribute will be generated from the matched input
  149. // sequence every time it is requested. But as soon as you specify at
  150. // least one token attribute type you'll have to list all attribute types
  151. // used for token_def<> declarations in the token definition class above,
  152. // otherwise compilation errors will occur.
  153. typedef lex::lexertl::token<
  154. base_iterator_type, boost::mpl::vector<unsigned int, std::string>
  155. > token_type;
  156. //]
  157. // Here we use the lexertl based lexer engine.
  158. typedef lex::lexertl::lexer<token_type> lexer_type;
  159. // This is the token definition type (derived from the given lexer type).
  160. typedef example4_tokens<lexer_type> example4_tokens;
  161. // this is the iterator type exposed by the lexer
  162. typedef example4_tokens::iterator_type iterator_type;
  163. // this is the type of the grammar to parse
  164. typedef example4_grammar<iterator_type, example4_tokens::lexer_def> example4_grammar;
  165. // now we use the types defined above to create the lexer and grammar
  166. // object instances needed to invoke the parsing process
  167. example4_tokens tokens; // Our lexer
  168. example4_grammar calc(tokens); // Our parser
  169. std::string str (read_from_file("example4.input"));
  170. // At this point we generate the iterator pair used to expose the
  171. // tokenized input stream.
  172. std::string::iterator it = str.begin();
  173. iterator_type iter = tokens.begin(it, str.end());
  174. iterator_type end = tokens.end();
  175. // Parsing is done based on the token stream, not the character
  176. // stream read from the input.
  177. // Note how we use the lexer defined above as the skip parser. It must
  178. // be explicitly wrapped inside a state directive, switching the lexer
  179. // state for the duration of skipping whitespace.
  180. bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]);
  181. if (r && iter == end)
  182. {
  183. std::cout << "-------------------------\n";
  184. std::cout << "Parsing succeeded\n";
  185. std::cout << "-------------------------\n";
  186. }
  187. else
  188. {
  189. std::cout << "-------------------------\n";
  190. std::cout << "Parsing failed\n";
  191. std::cout << "-------------------------\n";
  192. }
  193. std::cout << "Bye... :-) \n\n";
  194. return 0;
  195. }