example5.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. // Copyright (c) 2001-2010 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. // This example shows how to create a simple lexer recognizing a couple of
  6. // different tokens aimed at a simple language and how to use this lexer with
  7. // a grammar. It shows how to associate attributes to tokens and how to access the
  8. // token attributes from inside the grammar.
  9. //
  10. // Additionally, this example demonstrates, how to define a token set usable
  11. // as the skip parser during parsing, allowing to define several tokens to be
  12. // ignored.
  13. //
  14. // The main purpose of this example is to show how inheritance can be used to
  15. // overload parts of a base grammar and add token definitions to a base lexer.
  16. //
  17. // Further, it shows how you can use the 'omit' attribute type specifier
  18. // for token definitions to force the token to have no attribute (expose an
  19. // unused attribute).
  20. //
  21. // This example recognizes a very simple programming language having
  22. // assignment statements and if and while control structures. Look at the file
  23. // example5.input for an example.
  24. #include <boost/config/warning_disable.hpp>
  25. #include <boost/spirit/include/qi.hpp>
  26. #include <boost/spirit/include/lex_lexertl.hpp>
  27. #include <boost/spirit/include/phoenix_operator.hpp>
  28. #include <iostream>
  29. #include <fstream>
  30. #include <string>
  31. #include "example.hpp"
  32. using namespace boost::spirit;
  33. using boost::phoenix::val;
  34. ///////////////////////////////////////////////////////////////////////////////
  35. // Token definition base, defines all tokens for the base grammar below
  36. ///////////////////////////////////////////////////////////////////////////////
  37. template <typename Lexer>
  38. struct example5_base_tokens : lex::lexer<Lexer>
  39. {
  40. protected:
  41. // this lexer is supposed to be used as a base type only
  42. example5_base_tokens() {}
  43. public:
  44. void init_token_definitions()
  45. {
  46. // define the tokens to match
  47. identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
  48. constant = "[0-9]+";
  49. if_ = "if";
  50. while_ = "while";
  51. // associate the tokens and the token set with the lexer
  52. this->self += lex::token_def<>('(') | ')' | '{' | '}' | '=' | ';' | constant;
  53. this->self += if_ | while_ | identifier;
  54. // define the whitespace to ignore (spaces, tabs, newlines and C-style
  55. // comments)
  56. this->self("WS")
  57. = lex::token_def<>("[ \\t\\n]+")
  58. | "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
  59. ;
  60. }
  61. // these tokens have no attribute
  62. lex::token_def<lex::omit> if_, while_;
  63. // The following two tokens have an associated attribute type, 'identifier'
  64. // carries a string (the identifier name) and 'constant' carries the
  65. // matched integer value.
  66. //
  67. // Note: any token attribute type explicitly specified in a token_def<>
  68. // declaration needs to be listed during token type definition as
  69. // well (see the typedef for the token_type below).
  70. //
  71. // The conversion of the matched input to an instance of this type occurs
  72. // once (on first access), which makes token attributes as efficient as
  73. // possible. Moreover, token instances are constructed once by the lexer
  74. // library. From this point on tokens are passed by reference only,
  75. // avoiding them being copied around.
  76. lex::token_def<std::string> identifier;
  77. lex::token_def<unsigned int> constant;
  78. };
  79. ///////////////////////////////////////////////////////////////////////////////
  80. // Grammar definition base, defines a basic language
  81. ///////////////////////////////////////////////////////////////////////////////
  82. template <typename Iterator, typename Lexer>
  83. struct example5_base_grammar
  84. : qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
  85. {
  86. template <typename TokenDef>
  87. example5_base_grammar(TokenDef const& tok)
  88. : example5_base_grammar::base_type(program)
  89. {
  90. using boost::spirit::_val;
  91. program
  92. = +block
  93. ;
  94. block
  95. = '{' >> *statement >> '}'
  96. ;
  97. statement
  98. = assignment
  99. | if_stmt
  100. | while_stmt
  101. ;
  102. assignment
  103. = (tok.identifier >> '=' >> expression >> ';')
  104. [
  105. std::cout << val("assignment statement to: ") << _1 << "\n"
  106. ]
  107. ;
  108. if_stmt
  109. = (tok.if_ >> '(' >> expression >> ')' >> block)
  110. [
  111. std::cout << val("if expression: ") << _1 << "\n"
  112. ]
  113. ;
  114. while_stmt
  115. = (tok.while_ >> '(' >> expression >> ')' >> block)
  116. [
  117. std::cout << val("while expression: ") << _1 << "\n"
  118. ]
  119. ;
  120. // since expression has a variant return type accommodating for
  121. // std::string and unsigned integer, both possible values may be
  122. // returned to the calling rule
  123. expression
  124. = tok.identifier [ _val = _1 ]
  125. | tok.constant [ _val = _1 ]
  126. ;
  127. }
  128. typedef qi::in_state_skipper<Lexer> skipper_type;
  129. qi::rule<Iterator, skipper_type> program, block, statement;
  130. qi::rule<Iterator, skipper_type> assignment, if_stmt;
  131. qi::rule<Iterator, skipper_type> while_stmt;
  132. // the expression is the only rule having a return value
  133. typedef boost::variant<unsigned int, std::string> expression_type;
  134. qi::rule<Iterator, expression_type(), skipper_type> expression;
  135. };
  136. ///////////////////////////////////////////////////////////////////////////////
  137. // Token definition for derived lexer, defines additional tokens
  138. ///////////////////////////////////////////////////////////////////////////////
  139. template <typename Lexer>
  140. struct example5_tokens : example5_base_tokens<Lexer>
  141. {
  142. typedef example5_base_tokens<Lexer> base_type;
  143. example5_tokens()
  144. {
  145. // define the additional token to match
  146. else_ = "else";
  147. // associate the new token with the lexer, note we add 'else' before
  148. // anything else to add it to the token set before the identifier
  149. // token, otherwise "else" would be matched as an identifier
  150. this->self = else_;
  151. // now add the token definitions from the base class
  152. this->base_type::init_token_definitions();
  153. }
  154. // this token has no attribute
  155. lex::token_def<lex::omit> else_;
  156. };
  157. ///////////////////////////////////////////////////////////////////////////////
  158. // Derived grammar definition, defines a language extension
  159. ///////////////////////////////////////////////////////////////////////////////
  160. template <typename Iterator, typename Lexer>
  161. struct example5_grammar : example5_base_grammar<Iterator, Lexer>
  162. {
  163. template <typename TokenDef>
  164. example5_grammar(TokenDef const& tok)
  165. : example5_base_grammar<Iterator, Lexer>(tok)
  166. {
  167. // we alter the if_stmt only
  168. this->if_stmt
  169. = this->if_stmt.copy() >> -(tok.else_ >> this->block)
  170. ;
  171. }
  172. };
  173. ///////////////////////////////////////////////////////////////////////////////
  174. int main()
  175. {
  176. // iterator type used to expose the underlying input stream
  177. typedef std::string::iterator base_iterator_type;
  178. // This is the lexer token type to use. The second template parameter lists
  179. // all attribute types used for token_def's during token definition (see
  180. // example5_base_tokens<> above). Here we use the predefined lexertl token
  181. // type, but any compatible token type may be used instead.
  182. //
  183. // If you don't list any token attribute types in the following declaration
  184. // (or just use the default token type: lexertl_token<base_iterator_type>)
  185. // it will compile and work just fine, just a bit less efficient. This is
  186. // because the token attribute will be generated from the matched input
  187. // sequence every time it is requested. But as soon as you specify at
  188. // least one token attribute type you'll have to list all attribute types
  189. // used for token_def<> declarations in the token definition class above,
  190. // otherwise compilation errors will occur.
  191. typedef lex::lexertl::token<
  192. base_iterator_type, boost::mpl::vector<unsigned int, std::string>
  193. > token_type;
  194. // Here we use the lexertl based lexer engine.
  195. typedef lex::lexertl::lexer<token_type> lexer_type;
  196. // This is the token definition type (derived from the given lexer type).
  197. typedef example5_tokens<lexer_type> example5_tokens;
  198. // this is the iterator type exposed by the lexer
  199. typedef example5_tokens::iterator_type iterator_type;
  200. // this is the type of the grammar to parse
  201. typedef example5_grammar<iterator_type, example5_tokens::lexer_def> example5_grammar;
  202. // now we use the types defined above to create the lexer and grammar
  203. // object instances needed to invoke the parsing process
  204. example5_tokens tokens; // Our lexer
  205. example5_grammar calc(tokens); // Our parser
  206. std::string str (read_from_file("example5.input"));
  207. // At this point we generate the iterator pair used to expose the
  208. // tokenized input stream.
  209. std::string::iterator it = str.begin();
  210. iterator_type iter = tokens.begin(it, str.end());
  211. iterator_type end = tokens.end();
  212. // Parsing is done based on the token stream, not the character
  213. // stream read from the input.
  214. // Note how we use the lexer defined above as the skip parser. It must
  215. // be explicitly wrapped inside a state directive, switching the lexer
  216. // state for the duration of skipping whitespace.
  217. std::string ws("WS");
  218. bool r = qi::phrase_parse(iter, end, calc, qi::in_state(ws)[tokens.self]);
  219. if (r && iter == end)
  220. {
  221. std::cout << "-------------------------\n";
  222. std::cout << "Parsing succeeded\n";
  223. std::cout << "-------------------------\n";
  224. }
  225. else
  226. {
  227. std::cout << "-------------------------\n";
  228. std::cout << "Parsing failed\n";
  229. std::cout << "-------------------------\n";
  230. }
  231. std::cout << "Bye... :-) \n\n";
  232. return 0;
  233. }