123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150 |
- // Copyright (c) 2001-2010 Hartmut Kaiser
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- // This example shows how to create a simple lexer recognizing a couple of
- // different tokens and how to use this with a grammar. This example has a
- // heavily backtracking grammar which makes it a candidate for lexer based
- // parsing (all tokens are scanned and generated only once, even if
- // backtracking is required) which speeds up the overall parsing process
- // considerably, out-weighting the overhead needed for setting up the lexer.
- //
- // Additionally, this example demonstrates, how to define a token set usable
- // as the skip parser during parsing, allowing to define several tokens to be
- // ignored.
- //
- // This example recognizes couplets, which are sequences of numbers enclosed
- // in matching pairs of parenthesis. See the comments below to for details
- // and examples.
- // #define BOOST_SPIRIT_LEXERTL_DEBUG
- // #define BOOST_SPIRIT_DEBUG
- #include <boost/config/warning_disable.hpp>
- #include <boost/spirit/include/qi.hpp>
- #include <boost/spirit/include/lex_lexertl.hpp>
- #include <iostream>
- #include <fstream>
- #include <string>
- #include "example.hpp"
- using namespace boost::spirit;
- ///////////////////////////////////////////////////////////////////////////////
- // Token definition
- ///////////////////////////////////////////////////////////////////////////////
- template <typename Lexer>
- struct example3_tokens : lex::lexer<Lexer>
- {
- example3_tokens()
- {
- // define the tokens to match
- ellipses = "\\.\\.\\.";
- number = "[0-9]+";
- // associate the tokens and the token set with the lexer
- this->self = ellipses | '(' | ')' | number;
- // define the whitespace to ignore (spaces, tabs, newlines and C-style
- // comments)
- this->self("WS")
- = lex::token_def<>("[ \\t\\n]+") // whitespace
- | "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/" // C style comments
- ;
- }
- // these tokens expose the iterator_range of the matched input sequence
- lex::token_def<> ellipses, identifier, number;
- };
- ///////////////////////////////////////////////////////////////////////////////
- // Grammar definition
- ///////////////////////////////////////////////////////////////////////////////
- template <typename Iterator, typename Lexer>
- struct example3_grammar
- : qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
- {
- template <typename TokenDef>
- example3_grammar(TokenDef const& tok)
- : example3_grammar::base_type(start)
- {
- start
- = +(couplet | tok.ellipses)
- ;
- // A couplet matches nested left and right parenthesis.
- // For example:
- // (1) (1 2) (1 2 3) ...
- // ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ...
- // (((1))) ...
- couplet
- = tok.number
- | '(' >> +couplet >> ')'
- ;
- BOOST_SPIRIT_DEBUG_NODE(start);
- BOOST_SPIRIT_DEBUG_NODE(couplet);
- }
- qi::rule<Iterator, qi::in_state_skipper<Lexer> > start, couplet;
- };
- ///////////////////////////////////////////////////////////////////////////////
- int main()
- {
- // iterator type used to expose the underlying input stream
- typedef std::string::iterator base_iterator_type;
- // This is the token type to return from the lexer iterator
- typedef lex::lexertl::token<base_iterator_type> token_type;
- // This is the lexer type to use to tokenize the input.
- // Here we use the lexertl based lexer engine.
- typedef lex::lexertl::lexer<token_type> lexer_type;
- // This is the token definition type (derived from the given lexer type).
- typedef example3_tokens<lexer_type> example3_tokens;
- // this is the iterator type exposed by the lexer
- typedef example3_tokens::iterator_type iterator_type;
- // this is the type of the grammar to parse
- typedef example3_grammar<iterator_type, example3_tokens::lexer_def> example3_grammar;
- // now we use the types defined above to create the lexer and grammar
- // object instances needed to invoke the parsing process
- example3_tokens tokens; // Our lexer
- example3_grammar calc(tokens); // Our parser
- std::string str (read_from_file("example3.input"));
- // At this point we generate the iterator pair used to expose the
- // tokenized input stream.
- std::string::iterator it = str.begin();
- iterator_type iter = tokens.begin(it, str.end());
- iterator_type end = tokens.end();
- // Parsing is done based on the token stream, not the character
- // stream read from the input.
- // Note how we use the lexer defined above as the skip parser.
- bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]);
- if (r && iter == end)
- {
- std::cout << "-------------------------\n";
- std::cout << "Parsing succeeded\n";
- std::cout << "-------------------------\n";
- }
- else
- {
- std::cout << "-------------------------\n";
- std::cout << "Parsing failed\n";
- std::cout << "-------------------------\n";
- }
- std::cout << "Bye... :-) \n\n";
- return 0;
- }
|