/////////////////////////////////////////////////////////////////////////////// // toy_spirit.hpp // // Copyright 2008 Eric Niebler. Distributed under the Boost // Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #include namespace boost { // global tags struct char_tag {}; struct ichar_tag {}; struct istring_tag {}; struct ichar_range_tag {}; struct never_tag {}; struct always_tag {}; struct space_tag {}; // global primitives proto::terminal::type const char_ = {{}}; proto::terminal::type const space = {{}}; using proto::lit; using proto::literal; } namespace boost { namespace spirit2 { // handy typedefs typedef proto::terminal::type anychar_p; typedef proto::terminal::type ianychar_p; typedef proto::terminal::type ianystr_p; typedef proto::terminal::type ianychar_range_p; typedef proto::terminal::type never_p; typedef proto::terminal::type space_p; struct SpiritGrammar; struct SkipperGrammar; struct SpiritPrimitives; template struct SpiritComposites; struct CharLiteral : proto::terminal {}; struct NTBSLiteral : proto::terminal {}; struct StdStringLiteral : proto::terminal {}; struct CharParser : proto::function {}; struct ICharParser : proto::function {}; struct CharRangeParser : proto::function {}; struct IStrParser : proto::function {}; struct ICharRangeParser : proto::function {}; ianychar_p const ichar_ = {{}}; ianystr_p const istr_ = {{}}; ianychar_range_p const ichar_range_ = {{}}; namespace utility { inline bool char_icmp(char ch, char lo, char hi) { return ch == lo || ch == hi; } template inline bool string_cmp(char const *sz, FwdIter &begin, FwdIter end) { FwdIter tmp = begin; for(; *sz; ++tmp, ++sz) if(tmp == end || *tmp != *sz) return false; begin = tmp; return true; } template inline bool string_icmp(std::string const &str, FwdIter &begin, FwdIter end) { BOOST_ASSERT(0 == str.size() % 2); FwdIter tmp = begin; std::string::const_iterator istr = str.begin(), estr = str.end(); for(; istr != estr; ++tmp, istr += 2) if(tmp == end || (*tmp != *istr && *tmp != *(istr+1))) return false; begin = tmp; return true; } inline bool in_range(char ch, char lo, char hi) { return ch >= lo && ch <= hi; } inline bool in_irange(char ch, char lo, char hi) { return in_range(ch, lo, hi) || in_range(std::tolower(ch), lo, hi) || in_range(std::toupper(ch), lo, hi); } inline std::string to_istr(char const *sz) { std::string res; res.reserve(std::strlen(sz) * 2); for(; *sz; ++sz) { res.push_back(std::tolower(*sz)); res.push_back(std::toupper(*sz)); } return res; } } // namespace utility template struct spirit_context : std::pair , proto::callable_context > { typedef bool result_type; typedef FwdIter iterator; spirit_context(FwdIter first, FwdIter second, Skipper const &skip = Skipper()) : std::pair(first, second) , skip_(skip) , in_skip_(false) {} // parse function for anychar_p bool operator()(proto::tag::terminal, char_tag) { this->skip(); if(this->first == this->second) return false; ++this->first; return true; } // parse function for char_('a') template bool operator()(proto::tag::function, anychar_p, Expr const &expr) { this->skip(); return proto::eval(expr, *this); } // parse function for space_p bool operator()(proto::tag::terminal, space_tag) { this->skip(); if(this->first == this->second || !std::isspace(*this->first)) return false; ++this->first; return true; } // parse function for bare character literals bool operator()(proto::tag::terminal, char ch) { this->skip(); if(this->first == this->second || *this->first != ch) return false; ++this->first; return true; } // case-insensitive character parser template bool operator()(proto::tag::function, ianychar_p, Arg1 const &arg1, Arg2 const &arg2) { this->skip(); if(this->first == this->second || !utility::char_icmp(*this->first, proto::value(arg1), proto::value(arg2))) return false; ++this->first; return true; } // parse function for NTBS literals bool operator()(proto::tag::terminal, char const *sz) { this->skip(); return utility::string_cmp(sz, this->first, this->second); } // parse function for istr_("hello") template bool operator()(proto::tag::function, ianystr_p, Expr const &expr) { this->skip(); return utility::string_icmp(proto::value(expr), this->first, this->second); } // parse function for char_('a','z') template bool operator()(proto::tag::function, anychar_p, Arg1 const &arg1, Arg2 const &arg2) { BOOST_ASSERT(proto::value(arg1) <= proto::value(arg2)); this->skip(); if(this->first == this->second || !utility::in_range(*this->first, proto::value(arg1), proto::value(arg2))) return false; ++this->first; return true; } // parse function for ichar_range_('a','z') template bool operator()(proto::tag::function, ianychar_range_p, Arg1 const &arg1, Arg2 const &arg2) { BOOST_ASSERT(proto::value(arg1) <= proto::value(arg2)); this->skip(); if(this->first == this->second || !utility::in_irange(*this->first, proto::value(arg1), proto::value(arg2))) return false; ++this->first; return true; } // parse function for complemented thingies (where thingies are assumed // to be 1 character wide). template bool operator()(proto::tag::complement, Expr const &expr) { this->skip(); iterator where = this->first; if(proto::eval(expr, *this)) return this->first = where, false; this->first = ++where; return true; } // never_p parse function always returns false. bool operator()(proto::tag::terminal, never_tag) { return false; } // for A >> B, succeeds if A and B matches. template bool operator()(proto::tag::shift_right, Left const &left, Right const &right) { return proto::eval(left, *this) && proto::eval(right, *this); } // for A | B, succeeds if either A or B matches at this point. template bool operator()(proto::tag::bitwise_or, Left const &left, Right const &right) { iterator where = this->first; return proto::eval(left, *this) || proto::eval(right, this->reset(where)); } // for *A, greedily match A as many times as possible. template bool operator()(proto::tag::dereference, Expr const &expr) { iterator where = this->first; while(proto::eval(expr, *this)) where = this->first; // make sure that when we return true, the iterator is at the correct position! this->first = where; return true; } // for +A, greedily match A one or more times. template bool operator()(proto::tag::unary_plus, Expr const &expr) { return proto::eval(expr, *this) && proto::eval(*expr, *this); } // for !A, optionally match A. template bool operator()(proto::tag::logical_not, Expr const &expr) { iterator where = this->first; if(!proto::eval(expr, *this)) this->first = where; return true; } // for (A - B), matches when A but not B matches. template bool operator()(proto::tag::minus, Left const &left, Right const &right) { iterator where = this->first; return !proto::eval(right, *this) && proto::eval(left, this->reset(where)); } private: spirit_context &reset(iterator where) { this->first = where; return *this; } void skip() { if(!this->in_skip_) { this->in_skip_ = true; while(proto::eval(this->skip_, *this)) {} this->in_skip_ = false; } } Skipper skip_; bool in_skip_; }; struct as_ichar_parser : proto::callable { typedef proto::function< ianychar_p , proto::terminal::type , proto::terminal::type >::type result_type; template result_type operator()(Expr const &expr) const { char lo = std::tolower(proto::value(proto::child_c<1>(expr))); char hi = std::toupper(proto::value(proto::child_c<1>(expr))); result_type that = {ichar_, {lo}, {hi}}; return that; } }; struct as_ichar_range_parser : proto::callable { typedef proto::function< ianychar_range_p , proto::terminal::type , proto::terminal::type >::type result_type; template result_type operator()(Expr const &expr) const { char lo = proto::value(proto::child_c<1>(expr)); char hi = proto::value(proto::child_c<2>(expr)); result_type that = {ichar_range_, {lo}, {hi}}; return that; } }; struct as_ichar_literal : proto::callable { typedef proto::function< ianychar_p , proto::terminal::type , proto::terminal::type >::type result_type; template result_type operator()(Expr const &expr) const { char lo = std::tolower(proto::value(expr)); char hi = std::toupper(proto::value(expr)); result_type that = {ichar_, {lo}, {hi}}; return that; } }; struct as_intbs_literal : proto::callable { typedef proto::function< ianystr_p , proto::terminal::type >::type result_type; template result_type operator()(Expr const &expr) const { result_type that = {istr_, {utility::to_istr(proto::value(expr))}}; return that; } }; struct as_istdstring_literal : proto::callable { typedef proto::function< ianystr_p , proto::terminal::type >::type result_type; template result_type operator()(Expr const &expr) const { result_type that = {istr_, {utility::to_istr(proto::value(expr).c_str())}}; return that; } }; /////////////////////////////////////////////////////////////////////////// // Transforms /////////////////////////////////////////////////////////////////////////// struct skip_primitives : proto::transform { template struct impl : proto::transform_impl { typedef typename proto::shift_right< typename proto::dereference::type , Expr >::type result_type; result_type operator ()( typename impl::expr_param expr , typename impl::state_param state , typename impl::data_param data ) const { result_type that = {{state}, expr}; return that; } }; }; /////////////////////////////////////////////////////////////////////////// // Grammar /////////////////////////////////////////////////////////////////////////// using proto::_; struct SpiritGrammar; struct SpiritCaseSensitivePrimitives : proto::or_< proto::when , proto::when , proto::when , proto::when , proto::when > {}; struct SpiritCaseInsensitivePrimitives : proto::or_< anychar_p , IStrParser , ICharParser , ICharRangeParser , proto::complement > {}; struct SpiritPrimitives : proto::or_< SpiritCaseSensitivePrimitives , SpiritCaseInsensitivePrimitives > {}; template struct SpiritComposites : proto::or_< proto::bitwise_or< Grammar, Grammar > , proto::shift_right< Grammar, Grammar > , proto::minus< Grammar, Grammar > , proto::dereference< Grammar > , proto::unary_plus< Grammar > , proto::logical_not< Grammar > > {}; // Regular Spirit grammar, has no-case transforms struct SpiritGrammar : proto::or_< SpiritComposites , SpiritPrimitives > {}; // Spirit grammar with the skipper transform struct SkipperGrammar : proto::or_< SpiritComposites , proto::when > {}; /////////////////////////////////////////////////////////////////////////// // Directives /////////////////////////////////////////////////////////////////////////// struct no_case_directive { template typename boost::result_of::type const operator [](Expr const &expr) const { return SpiritGrammar()(expr); } }; // no_case no_case_directive const no_case = {}; template struct skip_directive { skip_directive(Skipper const &skip) : skip_(skip) {} template typename boost::result_of::type const operator [](Expr const &expr) const { return SkipperGrammar()(expr, this->skip_); } private: Skipper skip_; }; // skip template skip_directive skip(Skipper const &skip) { return skip_directive(skip); } /////////////////////////////////////////////////////////////////////////// // parse /////////////////////////////////////////////////////////////////////////// template bool parse(FwdIter begin, FwdIter end, Rule const &rule) { // make sure the rule corresponds to the Spirit grammar: BOOST_MPL_ASSERT((proto::matches)); spirit_context ctx(begin, end); return proto::eval(rule, ctx); } // parse with a skip parser can be implemented in one of two ways: // Method 1) // The skip parser is passed to all the parsers which invoke it // before they invoke themselves. This is how Spirit-1 does it, // and it is the cause of the Scanner Business. However, it has // the advantage of not needing a parser transformation phase. // Method 2) // Transform the expression template to insert the skip parser // in between all sequenced parsers. That is, transform (A >> B) // to (*skip >> A >> *skip >> B). This has the advantage of making // it unnecessary to pass the scanner to all the parsers, which // means its type doesn't show up in function signatures, avoiding // the Scanner Business. // Recommendation: // Both methods should be supported. Method 1 should be preferred // when calling parse with parsers defined inline. Method 2 should // be preferred when a parser expression is assigned to a rule<>, // thereby making the type of the rule<> independent of the skip // parser used. I imagine a syntax like: // rule<> r = skip(space)[A >> B >> C] template bool parse(FwdIter begin, FwdIter end, Rule const &rule, Skipper const &skipper) { // make sure the rule corresponds to the Spirit grammar: BOOST_MPL_ASSERT((proto::matches)); //// Method 1: pass skip parser in the context structure. //spirit_context ctx(begin, end, skipper); //return proto::eval(rule, ctx); // Method 2: Embed skip parser via tree transformation. spirit_context ctx(begin, end); return proto::eval(spirit2::skip(skipper)[rule], ctx); } }} using namespace boost; using namespace spirit2; void test_toy_spirit() { std::string str("abcd"); // This will fail: BOOST_CHECK(!spirit2::parse(str.begin(), str.end() , char_ >> char_('a'))); // This will succeed: BOOST_CHECK(spirit2::parse(str.begin(), str.end() , char_ >> char_('b') >> char_ >> 'd')); // This will succeed: BOOST_CHECK(spirit2::parse(str.begin(), str.end() , 'a' >> ('c' >> char_ | 'b' >> char_('d') | 'b' >> char_('c')) >> 'd')); // This will succeed: BOOST_CHECK(spirit2::parse(str.begin(), str.end() , *(char_ - 'd'))); // This will succeed: BOOST_CHECK(spirit2::parse(str.begin(), str.end() , no_case[char_('A') >> 'B' >> "CD"])); // This will succeed: BOOST_CHECK(spirit2::parse(str.begin(), str.end() , no_case[*char_('A','Z')])); literal a = lit('a'); literal bcd = lit("bcd"); // This will succeed: BOOST_CHECK(spirit2::parse(str.begin(), str.end() , +~~a >> no_case[bcd])); // Scanner Business: R.I.P. :-) str = "a b cd"; BOOST_CHECK(spirit2::parse(str.begin(), str.end() , char_('a') >> 'b' >> 'c' >> 'd', space >> space)); } using namespace boost::unit_test; /////////////////////////////////////////////////////////////////////////////// // init_unit_test_suite // test_suite* init_unit_test_suite( int argc, char* argv[] ) { test_suite *test = BOOST_TEST_SUITE("test proto and and toy spirit-2"); test->add(BOOST_TEST_CASE(&test_toy_spirit)); return test; }