devn00b
/
EQ2EMu


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
							//  Copyright (c) 2001-2010 Hartmut Kaiser
// 
//  Distributed under the Boost Software License, Version 1.0. (See accompanying 
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

//  This example shows how to create a simple lexer recognizing a couple of 
//  different tokens and how to use this with a grammar. This example has a 
//  heavily backtracking grammar which makes it a candidate for lexer based 
//  parsing (all tokens are scanned and generated only once, even if 
//  backtracking is required) which speeds up the overall parsing process 
//  considerably, out-weighting the overhead needed for setting up the lexer.
//  Additionally it demonstrates how to use one of the defined tokens as a 
//  parser component in the grammar.
//
//  The grammar recognizes a simple input structure: any number of English 
//  simple sentences (statements, questions and commands) are recognized and
//  are being counted separately.

// #define BOOST_SPIRIT_DEBUG 
// #define BOOST_SPIRIT_LEXERTL_DEBUG

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>

#include <iostream>
#include <fstream>
#include <string>

#include "example.hpp"

using namespace boost::spirit;
using namespace boost::spirit::ascii;
using boost::phoenix::ref;

///////////////////////////////////////////////////////////////////////////////
//  Token definition
///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct example2_tokens : lex::lexer<Lexer>
{
    example2_tokens()
    {
        //  A 'word' is comprised of one or more letters and an optional 
        //  apostrophe. If it contains an apostrophe, there may only be one and 
        //  the apostrophe must be preceded and succeeded by at least 1 letter.  
        //  For example, "I'm" and "doesn't" meet the definition of 'word' we 
        //  define below.
        word = "[a-zA-Z]+('[a-zA-Z]+)?";

        // Associate the tokens and the token set with the lexer. Note that 
        // single character token definitions as used below always get 
        // interpreted literally and never as special regex characters. This is
        // done to be able to assign single characters the id of their character
        // code value, allowing to reference those as literals in Qi grammars.
        this->self = lex::token_def<>(',') | '!' | '.' | '?' | ' ' | '\n' | word;
    }

    lex::token_def<> word;
};

///////////////////////////////////////////////////////////////////////////////
//  Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct example2_grammar : qi::grammar<Iterator>
{
    template <typename TokenDef>
    example2_grammar(TokenDef const& tok)
      : example2_grammar::base_type(story)
      , paragraphs(0), commands(0), questions(0), statements(0)
    {
        story 
            =  +paragraph
            ;

        paragraph
            =   (  +(   command [ ++ref(commands) ] 
                    |   question [ ++ref(questions) ]
                    |   statement [ ++ref(statements) ]
                    ) 
                    >> *char_(' ') >> +char_('\n')
                ) 
                [ ++ref(paragraphs) ]
            ;

        command 
            =  +(tok.word | ' ' | ',') >> '!' 
            ;

        question 
            =  +(tok.word | ' ' | ',') >> '?' 
            ;

        statement 
            =  +(tok.word | ' ' | ',') >> '.' 
            ;

        BOOST_SPIRIT_DEBUG_NODE(story);
        BOOST_SPIRIT_DEBUG_NODE(paragraph);
        BOOST_SPIRIT_DEBUG_NODE(command);
        BOOST_SPIRIT_DEBUG_NODE(question);
        BOOST_SPIRIT_DEBUG_NODE(statement);
    }

    qi::rule<Iterator> story, paragraph, command, question, statement;
    int paragraphs, commands, questions, statements;
};

///////////////////////////////////////////////////////////////////////////////
int main()
{
    // iterator type used to expose the underlying input stream
    typedef std::string::iterator base_iterator_type;

    // This is the token type to return from the lexer iterator
    typedef lex::lexertl::token<base_iterator_type> token_type;

    // This is the lexer type to use to tokenize the input.
    // Here we use the lexertl based lexer engine.
    typedef lex::lexertl::lexer<token_type> lexer_type;

    // This is the token definition type (derived from the given lexer type).
    typedef example2_tokens<lexer_type> example2_tokens;

    // this is the iterator type exposed by the lexer 
    typedef example2_tokens::iterator_type iterator_type;

    // this is the type of the grammar to parse
    typedef example2_grammar<iterator_type> example2_grammar;

    // now we use the types defined above to create the lexer and grammar
    // object instances needed to invoke the parsing process
    example2_tokens tokens;                         // Our lexer
    example2_grammar calc(tokens);                  // Our parser 

    std::string str (read_from_file("example2.input"));

    // At this point we generate the iterator pair used to expose the
    // tokenized input stream.
    std::string::iterator it = str.begin();
    iterator_type iter = tokens.begin(it, str.end());
    iterator_type end = tokens.end();

    // Parsing is done based on the token stream, not the character 
    // stream read from the input.
    bool r = qi::parse(iter, end, calc);

    if (r && iter == end)
    {
        std::cout << "-------------------------\n";
        std::cout << "Parsing succeeded\n";
        std::cout << "There were " 
                  << calc.commands << " commands, " 
                  << calc.questions << " questions, and " 
                  << calc.statements << " statements.\n";
        std::cout << "-------------------------\n";
    }
    else
    {
        std::cout << "-------------------------\n";
        std::cout << "Parsing failed\n";
        std::cout << "-------------------------\n";
    }

    std::cout << "Bye... :-) \n\n";
    return 0;
}