123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133 |
- // Copyright (c) 2001-2010 Hartmut Kaiser
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- // Simple lexer/parser to test the Spirit installation.
- //
- // This example shows, how to create a simple lexer recognizing 5 different
- // tokens, and how to use a single token definition as the skip parser during
- // the parsing. Additionally, it demonstrates how to use one of the defined
- // tokens as a parser component in the grammar.
- //
- // The grammar recognizes a simple input structure, for instance:
- //
- // {
- // hello world, hello it is me
- // }
- //
- // Any number of simple sentences (optionally comma separated) inside a pair
- // of curly braces will be matched.
- // #define BOOST_SPIRIT_LEXERTL_DEBUG
- #include <boost/config/warning_disable.hpp>
- #include <boost/spirit/include/qi.hpp>
- #include <boost/spirit/include/lex_lexertl.hpp>
- #include <iostream>
- #include <fstream>
- #include <string>
- #include "example.hpp"
- using namespace boost::spirit;
- ///////////////////////////////////////////////////////////////////////////////
- // Token definition
- ///////////////////////////////////////////////////////////////////////////////
- template <typename Lexer>
- struct example1_tokens : lex::lexer<Lexer>
- {
- example1_tokens()
- {
- // define tokens and associate them with the lexer
- identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
- this->self = lex::char_(',') | '{' | '}' | identifier;
- // any token definition to be used as the skip parser during parsing
- // has to be associated with a separate lexer state (here 'WS')
- this->white_space = "[ \\t\\n]+";
- this->self("WS") = white_space;
- }
- lex::token_def<> identifier, white_space;
- };
- ///////////////////////////////////////////////////////////////////////////////
- // Grammar definition
- ///////////////////////////////////////////////////////////////////////////////
- template <typename Iterator>
- struct example1_grammar
- : qi::grammar<Iterator, qi::in_state_skipper<lex::token_def<> > >
- {
- template <typename TokenDef>
- example1_grammar(TokenDef const& tok)
- : example1_grammar::base_type(start)
- {
- start = '{' >> *(tok.identifier >> -ascii::char_(',')) >> '}';
- }
- qi::rule<Iterator, qi::in_state_skipper<lex::token_def<> > > start;
- };
- ///////////////////////////////////////////////////////////////////////////////
- int main()
- {
- // iterator type used to expose the underlying input stream
- typedef std::string::iterator base_iterator_type;
- // This is the token type to return from the lexer iterator
- typedef lex::lexertl::token<base_iterator_type> token_type;
- // This is the lexer type to use to tokenize the input.
- // We use the lexertl based lexer engine.
- typedef lex::lexertl::lexer<token_type> lexer_type;
- // This is the lexer type (derived from the given lexer type).
- typedef example1_tokens<lexer_type> example1_lex;
- // This is the iterator type exposed by the lexer
- typedef example1_lex::iterator_type iterator_type;
- // This is the type of the grammar to parse
- typedef example1_grammar<iterator_type> example1_grammar;
- // now we use the types defined above to create the lexer and grammar
- // object instances needed to invoke the parsing process
- example1_lex lex; // Our lexer
- example1_grammar calc(lex); // Our grammar definition
- std::string str (read_from_file("example1.input"));
- // At this point we generate the iterator pair used to expose the
- // tokenized input stream.
- std::string::iterator it = str.begin();
- iterator_type iter = lex.begin(it, str.end());
- iterator_type end = lex.end();
- // Parsing is done based on the token stream, not the character
- // stream read from the input.
- // Note, how we use the token_def defined above as the skip parser. It must
- // be explicitly wrapped inside a state directive, switching the lexer
- // state for the duration of skipping whitespace.
- bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[lex.white_space]);
- if (r && iter == end)
- {
- std::cout << "-------------------------\n";
- std::cout << "Parsing succeeded\n";
- std::cout << "-------------------------\n";
- }
- else
- {
- std::string rest(iter, end);
- std::cout << "-------------------------\n";
- std::cout << "Parsing failed\n";
- std::cout << "stopped at: \"" << rest << "\"\n";
- std::cout << "-------------------------\n";
- }
- std::cout << "Bye... :-) \n\n";
- return 0;
- }
|