example6.cpp 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. // Copyright (c) 2001-2010 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. // This example shows how to create a simple lexer recognizing a couple of
  6. // different tokens aimed at a simple language and how to use this lexer with
  7. // a grammar. It shows how to associate attributes to tokens and how to access the
  8. // token attributes from inside the grammar.
  9. //
  10. // Additionally, this example demonstrates, how to define a token set usable
  11. // as the skip parser during parsing, allowing to define several tokens to be
  12. // ignored.
  13. //
  14. // The example demonstrates how to use the add(...)(...) syntax to associate
  15. // token definitions with the lexer and how token ids can be used in the
  16. // parser to refer to a token, without having to directly reference its
  17. // definition.
  18. //
  19. // This example recognizes a very simple programming language having
  20. // assignment statements and if and while control structures. Look at the file
  21. // example6.input for an example.
  22. //
  23. // This example is essentially identical to example4.cpp. The only difference
  24. // is that we use the self.add() syntax to define tokens and to associate them
  25. // with the lexer.
  26. #include <boost/config/warning_disable.hpp>
  27. #include <boost/spirit/include/qi.hpp>
  28. #include <boost/spirit/include/lex_lexertl.hpp>
  29. #include <boost/spirit/include/phoenix_operator.hpp>
  30. #include <iostream>
  31. #include <fstream>
  32. #include <string>
  33. #include "example.hpp"
  34. using namespace boost::spirit;
  35. using boost::phoenix::val;
  36. ///////////////////////////////////////////////////////////////////////////////
  37. // Token id definitions
  38. ///////////////////////////////////////////////////////////////////////////////
  39. enum token_ids
  40. {
  41. ID_CONSTANT = 1000,
  42. ID_IF,
  43. ID_ELSE,
  44. ID_WHILE,
  45. ID_IDENTIFIER
  46. };
  47. ///////////////////////////////////////////////////////////////////////////////
  48. // Token definitions
  49. ///////////////////////////////////////////////////////////////////////////////
  50. template <typename Lexer>
  51. struct example6_tokens : lex::lexer<Lexer>
  52. {
  53. example6_tokens()
  54. {
  55. // define the tokens to match
  56. identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
  57. constant = "[0-9]+";
  58. // associate the tokens and the token set with the lexer
  59. this->self = lex::token_def<>('(') | ')' | '{' | '}' | '=' | ';';
  60. // Token definitions can be added by using some special syntactic
  61. // construct as shown below.
  62. // Note, that the token definitions added this way expose the iterator
  63. // pair pointing to the matched input stream as their attribute.
  64. this->self.add
  65. (constant, ID_CONSTANT)
  66. ("if", ID_IF)
  67. ("else", ID_ELSE)
  68. ("while", ID_WHILE)
  69. (identifier, ID_IDENTIFIER)
  70. ;
  71. // define the whitespace to ignore (spaces, tabs, newlines and C-style
  72. // comments) and add those to another lexer state (here: "WS")
  73. this->self("WS")
  74. = lex::token_def<>("[ \\t\\n]+")
  75. | "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
  76. ;
  77. }
  78. // The following two tokens have an associated attribute type, identifier
  79. // carries a string (the identifier name) and constant carries the matched
  80. // integer value.
  81. //
  82. // Note: any token attribute type explicitly specified in a token_def<>
  83. // declaration needs to be listed during token type definition as
  84. // well (see the typedef for the token_type below).
  85. //
  86. // The conversion of the matched input to an instance of this type occurs
  87. // once (on first access), which makes token attributes as efficient as
  88. // possible. Moreover, token instances are constructed once by the lexer
  89. // library. From this point on tokens are passed by reference only,
  90. // avoiding them being copied around.
  91. lex::token_def<std::string> identifier;
  92. lex::token_def<unsigned int> constant;
  93. };
  94. ///////////////////////////////////////////////////////////////////////////////
  95. // Grammar definition
  96. ///////////////////////////////////////////////////////////////////////////////
  97. template <typename Iterator, typename Lexer>
  98. struct example6_grammar
  99. : qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
  100. {
  101. template <typename TokenDef>
  102. example6_grammar(TokenDef const& tok)
  103. : example6_grammar::base_type(program)
  104. {
  105. using boost::spirit::_val;
  106. program
  107. = +block
  108. ;
  109. block
  110. = '{' >> *statement >> '}'
  111. ;
  112. statement
  113. = assignment
  114. | if_stmt
  115. | while_stmt
  116. ;
  117. assignment
  118. = (tok.identifier >> '=' >> expression >> ';')
  119. [
  120. std::cout << val("assignment statement to: ")
  121. << _1 << "\n"
  122. ]
  123. ;
  124. if_stmt
  125. = ( token(ID_IF) >> '(' >> expression >> ')' >> block
  126. >> -(token(ID_ELSE) >> block)
  127. )
  128. [
  129. std::cout << val("if expression: ")
  130. << _2 << "\n"
  131. ]
  132. ;
  133. while_stmt
  134. = (token(ID_WHILE) >> '(' >> expression >> ')' >> block)
  135. [
  136. std::cout << val("while expression: ")
  137. << _2 << "\n"
  138. ]
  139. ;
  140. // since expression has a variant return type accommodating for
  141. // std::string and unsigned integer, both possible values may be
  142. // returned to the calling rule
  143. expression
  144. = tok.identifier [ _val = _1 ]
  145. | tok.constant [ _val = _1 ]
  146. ;
  147. }
  148. typedef boost::variant<unsigned int, std::string> expression_type;
  149. qi::rule<Iterator, qi::in_state_skipper<Lexer> > program, block, statement;
  150. qi::rule<Iterator, qi::in_state_skipper<Lexer> > assignment, if_stmt;
  151. qi::rule<Iterator, qi::in_state_skipper<Lexer> > while_stmt;
  152. // the expression is the only rule having a return value
  153. qi::rule<Iterator, expression_type(), qi::in_state_skipper<Lexer> > expression;
  154. };
  155. ///////////////////////////////////////////////////////////////////////////////
  156. int main()
  157. {
  158. // iterator type used to expose the underlying input stream
  159. typedef std::string::iterator base_iterator_type;
  160. // This is the lexer token type to use. The second template parameter lists
  161. // all attribute types used for token_def's during token definition (see
  162. // calculator_tokens<> above). Here we use the predefined lexertl token
  163. // type, but any compatible token type may be used instead.
  164. //
  165. // If you don't list any token attribute types in the following declaration
  166. // (or just use the default token type: lexertl_token<base_iterator_type>)
  167. // it will compile and work just fine, just a bit less efficient. This is
  168. // because the token attribute will be generated from the matched input
  169. // sequence every time it is requested. But as soon as you specify at
  170. // least one token attribute type you'll have to list all attribute types
  171. // used for token_def<> declarations in the token definition class above,
  172. // otherwise compilation errors will occur.
  173. typedef lex::lexertl::token<
  174. base_iterator_type, boost::mpl::vector<unsigned int, std::string>
  175. > token_type;
  176. // Here we use the lexertl based lexer engine.
  177. typedef lex::lexertl::lexer<token_type> lexer_type;
  178. // This is the token definition type (derived from the given lexer type).
  179. typedef example6_tokens<lexer_type> example6_tokens;
  180. // this is the iterator type exposed by the lexer
  181. typedef example6_tokens::iterator_type iterator_type;
  182. // this is the type of the grammar to parse
  183. typedef example6_grammar<iterator_type, example6_tokens::lexer_def> example6_grammar;
  184. // now we use the types defined above to create the lexer and grammar
  185. // object instances needed to invoke the parsing process
  186. example6_tokens tokens; // Our lexer
  187. example6_grammar calc(tokens); // Our parser
  188. std::string str (read_from_file("example6.input"));
  189. // At this point we generate the iterator pair used to expose the
  190. // tokenized input stream.
  191. std::string::iterator it = str.begin();
  192. iterator_type iter = tokens.begin(it, str.end());
  193. iterator_type end = tokens.end();
  194. // Parsing is done based on the token stream, not the character
  195. // stream read from the input.
  196. // Note how we use the lexer defined above as the skip parser. It must
  197. // be explicitly wrapped inside a state directive, switching the lexer
  198. // state for the duration of skipping whitespace.
  199. std::string ws("WS");
  200. bool r = qi::phrase_parse(iter, end, calc, qi::in_state(ws)[tokens.self]);
  201. if (r && iter == end)
  202. {
  203. std::cout << "-------------------------\n";
  204. std::cout << "Parsing succeeded\n";
  205. std::cout << "-------------------------\n";
  206. }
  207. else
  208. {
  209. std::cout << "-------------------------\n";
  210. std::cout << "Parsing failed\n";
  211. std::cout << "-------------------------\n";
  212. }
  213. std::cout << "Bye... :-) \n\n";
  214. return 0;
  215. }