toy_spirit.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // toy_spirit.hpp
  3. //
  4. // Copyright 2008 Eric Niebler. Distributed under the Boost
  5. // Software License, Version 1.0. (See accompanying file
  6. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. #include <cctype>
  8. #include <string>
  9. #include <cstring>
  10. #include <iostream>
  11. #include <boost/assert.hpp>
  12. #include <boost/mpl/assert.hpp>
  13. #include <boost/proto/core.hpp>
  14. #include <boost/proto/context.hpp>
  15. #include <boost/test/unit_test.hpp>
  16. namespace boost
  17. {
  18. // global tags
  19. struct char_tag {};
  20. struct ichar_tag {};
  21. struct istring_tag {};
  22. struct ichar_range_tag {};
  23. struct never_tag {};
  24. struct always_tag {};
  25. struct space_tag {};
  26. // global primitives
  27. proto::terminal<char_tag>::type const char_ = {{}};
  28. proto::terminal<space_tag>::type const space = {{}};
  29. using proto::lit;
  30. using proto::literal;
  31. }
  32. namespace boost { namespace spirit2
  33. {
  34. // handy typedefs
  35. typedef proto::terminal<char_tag>::type anychar_p;
  36. typedef proto::terminal<ichar_tag>::type ianychar_p;
  37. typedef proto::terminal<istring_tag>::type ianystr_p;
  38. typedef proto::terminal<ichar_range_tag>::type ianychar_range_p;
  39. typedef proto::terminal<never_tag>::type never_p;
  40. typedef proto::terminal<space_tag>::type space_p;
  41. struct SpiritGrammar;
  42. struct SkipperGrammar;
  43. struct SpiritPrimitives;
  44. template<typename Grammar>
  45. struct SpiritComposites;
  46. struct CharLiteral
  47. : proto::terminal<char>
  48. {};
  49. struct NTBSLiteral
  50. : proto::terminal<char const *>
  51. {};
  52. struct StdStringLiteral
  53. : proto::terminal<std::string>
  54. {};
  55. struct CharParser
  56. : proto::function<anychar_p, CharLiteral>
  57. {};
  58. struct ICharParser
  59. : proto::function<ianychar_p, CharLiteral, CharLiteral>
  60. {};
  61. struct CharRangeParser
  62. : proto::function<anychar_p, CharLiteral, CharLiteral>
  63. {};
  64. struct IStrParser
  65. : proto::function<ianystr_p, StdStringLiteral>
  66. {};
  67. struct ICharRangeParser
  68. : proto::function<ianychar_range_p, CharLiteral, CharLiteral>
  69. {};
  70. ianychar_p const ichar_ = {{}};
  71. ianystr_p const istr_ = {{}};
  72. ianychar_range_p const ichar_range_ = {{}};
  73. namespace utility
  74. {
  75. inline bool char_icmp(char ch, char lo, char hi)
  76. {
  77. return ch == lo || ch == hi;
  78. }
  79. template<typename FwdIter>
  80. inline bool string_cmp(char const *sz, FwdIter &begin, FwdIter end)
  81. {
  82. FwdIter tmp = begin;
  83. for(; *sz; ++tmp, ++sz)
  84. if(tmp == end || *tmp != *sz)
  85. return false;
  86. begin = tmp;
  87. return true;
  88. }
  89. template<typename FwdIter>
  90. inline bool string_icmp(std::string const &str, FwdIter &begin, FwdIter end)
  91. {
  92. BOOST_ASSERT(0 == str.size() % 2);
  93. FwdIter tmp = begin;
  94. std::string::const_iterator istr = str.begin(), estr = str.end();
  95. for(; istr != estr; ++tmp, istr += 2)
  96. if(tmp == end || (*tmp != *istr && *tmp != *(istr+1)))
  97. return false;
  98. begin = tmp;
  99. return true;
  100. }
  101. inline bool in_range(char ch, char lo, char hi)
  102. {
  103. return ch >= lo && ch <= hi;
  104. }
  105. inline bool in_irange(char ch, char lo, char hi)
  106. {
  107. return in_range(ch, lo, hi)
  108. || in_range(std::tolower(ch), lo, hi)
  109. || in_range(std::toupper(ch), lo, hi);
  110. }
  111. inline std::string to_istr(char const *sz)
  112. {
  113. std::string res;
  114. res.reserve(std::strlen(sz) * 2);
  115. for(; *sz; ++sz)
  116. {
  117. res.push_back(std::tolower(*sz));
  118. res.push_back(std::toupper(*sz));
  119. }
  120. return res;
  121. }
  122. } // namespace utility
  123. template<typename FwdIter, typename Skipper = never_p>
  124. struct spirit_context
  125. : std::pair<FwdIter, FwdIter>
  126. , proto::callable_context<spirit_context<FwdIter, Skipper> >
  127. {
  128. typedef bool result_type;
  129. typedef FwdIter iterator;
  130. spirit_context(FwdIter first, FwdIter second, Skipper const &skip = Skipper())
  131. : std::pair<FwdIter, FwdIter>(first, second)
  132. , skip_(skip)
  133. , in_skip_(false)
  134. {}
  135. // parse function for anychar_p
  136. bool operator()(proto::tag::terminal, char_tag)
  137. {
  138. this->skip();
  139. if(this->first == this->second)
  140. return false;
  141. ++this->first;
  142. return true;
  143. }
  144. // parse function for char_('a')
  145. template<typename Expr>
  146. bool operator()(proto::tag::function, anychar_p, Expr const &expr)
  147. {
  148. this->skip();
  149. return proto::eval(expr, *this);
  150. }
  151. // parse function for space_p
  152. bool operator()(proto::tag::terminal, space_tag)
  153. {
  154. this->skip();
  155. if(this->first == this->second || !std::isspace(*this->first))
  156. return false;
  157. ++this->first;
  158. return true;
  159. }
  160. // parse function for bare character literals
  161. bool operator()(proto::tag::terminal, char ch)
  162. {
  163. this->skip();
  164. if(this->first == this->second || *this->first != ch)
  165. return false;
  166. ++this->first;
  167. return true;
  168. }
  169. // case-insensitive character parser
  170. template<typename Arg1, typename Arg2>
  171. bool operator()(proto::tag::function, ianychar_p, Arg1 const &arg1, Arg2 const &arg2)
  172. {
  173. this->skip();
  174. if(this->first == this->second
  175. || !utility::char_icmp(*this->first, proto::value(arg1), proto::value(arg2)))
  176. return false;
  177. ++this->first;
  178. return true;
  179. }
  180. // parse function for NTBS literals
  181. bool operator()(proto::tag::terminal, char const *sz)
  182. {
  183. this->skip();
  184. return utility::string_cmp(sz, this->first, this->second);
  185. }
  186. // parse function for istr_("hello")
  187. template<typename Expr>
  188. bool operator()(proto::tag::function, ianystr_p, Expr const &expr)
  189. {
  190. this->skip();
  191. return utility::string_icmp(proto::value(expr), this->first, this->second);
  192. }
  193. // parse function for char_('a','z')
  194. template<typename Arg1, typename Arg2>
  195. bool operator()(proto::tag::function, anychar_p, Arg1 const &arg1, Arg2 const &arg2)
  196. {
  197. BOOST_ASSERT(proto::value(arg1) <= proto::value(arg2));
  198. this->skip();
  199. if(this->first == this->second
  200. || !utility::in_range(*this->first, proto::value(arg1), proto::value(arg2)))
  201. return false;
  202. ++this->first;
  203. return true;
  204. }
  205. // parse function for ichar_range_('a','z')
  206. template<typename Arg1, typename Arg2>
  207. bool operator()(proto::tag::function, ianychar_range_p, Arg1 const &arg1, Arg2 const &arg2)
  208. {
  209. BOOST_ASSERT(proto::value(arg1) <= proto::value(arg2));
  210. this->skip();
  211. if(this->first == this->second
  212. || !utility::in_irange(*this->first, proto::value(arg1), proto::value(arg2)))
  213. return false;
  214. ++this->first;
  215. return true;
  216. }
  217. // parse function for complemented thingies (where thingies are assumed
  218. // to be 1 character wide).
  219. template<typename Expr>
  220. bool operator()(proto::tag::complement, Expr const &expr)
  221. {
  222. this->skip();
  223. iterator where = this->first;
  224. if(proto::eval(expr, *this))
  225. return this->first = where, false;
  226. this->first = ++where;
  227. return true;
  228. }
  229. // never_p parse function always returns false.
  230. bool operator()(proto::tag::terminal, never_tag)
  231. {
  232. return false;
  233. }
  234. // for A >> B, succeeds if A and B matches.
  235. template<typename Left, typename Right>
  236. bool operator()(proto::tag::shift_right, Left const &left, Right const &right)
  237. {
  238. return proto::eval(left, *this) && proto::eval(right, *this);
  239. }
  240. // for A | B, succeeds if either A or B matches at this point.
  241. template<typename Left, typename Right>
  242. bool operator()(proto::tag::bitwise_or, Left const &left, Right const &right)
  243. {
  244. iterator where = this->first;
  245. return proto::eval(left, *this) || proto::eval(right, this->reset(where));
  246. }
  247. // for *A, greedily match A as many times as possible.
  248. template<typename Expr>
  249. bool operator()(proto::tag::dereference, Expr const &expr)
  250. {
  251. iterator where = this->first;
  252. while(proto::eval(expr, *this))
  253. where = this->first;
  254. // make sure that when we return true, the iterator is at the correct position!
  255. this->first = where;
  256. return true;
  257. }
  258. // for +A, greedily match A one or more times.
  259. template<typename Expr>
  260. bool operator()(proto::tag::unary_plus, Expr const &expr)
  261. {
  262. return proto::eval(expr, *this) && proto::eval(*expr, *this);
  263. }
  264. // for !A, optionally match A.
  265. template<typename Expr>
  266. bool operator()(proto::tag::logical_not, Expr const &expr)
  267. {
  268. iterator where = this->first;
  269. if(!proto::eval(expr, *this))
  270. this->first = where;
  271. return true;
  272. }
  273. // for (A - B), matches when A but not B matches.
  274. template<typename Left, typename Right>
  275. bool operator()(proto::tag::minus, Left const &left, Right const &right)
  276. {
  277. iterator where = this->first;
  278. return !proto::eval(right, *this) && proto::eval(left, this->reset(where));
  279. }
  280. private:
  281. spirit_context &reset(iterator where)
  282. {
  283. this->first = where;
  284. return *this;
  285. }
  286. void skip()
  287. {
  288. if(!this->in_skip_)
  289. {
  290. this->in_skip_ = true;
  291. while(proto::eval(this->skip_, *this))
  292. {}
  293. this->in_skip_ = false;
  294. }
  295. }
  296. Skipper skip_;
  297. bool in_skip_;
  298. };
  299. struct as_ichar_parser : proto::callable
  300. {
  301. typedef proto::function<
  302. ianychar_p
  303. , proto::terminal<char>::type
  304. , proto::terminal<char>::type
  305. >::type result_type;
  306. template<typename Expr>
  307. result_type operator()(Expr const &expr) const
  308. {
  309. char lo = std::tolower(proto::value(proto::child_c<1>(expr)));
  310. char hi = std::toupper(proto::value(proto::child_c<1>(expr)));
  311. result_type that = {ichar_, {lo}, {hi}};
  312. return that;
  313. }
  314. };
  315. struct as_ichar_range_parser : proto::callable
  316. {
  317. typedef proto::function<
  318. ianychar_range_p
  319. , proto::terminal<char>::type
  320. , proto::terminal<char>::type
  321. >::type result_type;
  322. template<typename Expr>
  323. result_type operator()(Expr const &expr) const
  324. {
  325. char lo = proto::value(proto::child_c<1>(expr));
  326. char hi = proto::value(proto::child_c<2>(expr));
  327. result_type that = {ichar_range_, {lo}, {hi}};
  328. return that;
  329. }
  330. };
  331. struct as_ichar_literal : proto::callable
  332. {
  333. typedef proto::function<
  334. ianychar_p
  335. , proto::terminal<char>::type
  336. , proto::terminal<char>::type
  337. >::type result_type;
  338. template<typename Expr>
  339. result_type operator()(Expr const &expr) const
  340. {
  341. char lo = std::tolower(proto::value(expr));
  342. char hi = std::toupper(proto::value(expr));
  343. result_type that = {ichar_, {lo}, {hi}};
  344. return that;
  345. }
  346. };
  347. struct as_intbs_literal : proto::callable
  348. {
  349. typedef proto::function<
  350. ianystr_p
  351. , proto::terminal<std::string>::type
  352. >::type result_type;
  353. template<typename Expr>
  354. result_type operator()(Expr const &expr) const
  355. {
  356. result_type that = {istr_, {utility::to_istr(proto::value(expr))}};
  357. return that;
  358. }
  359. };
  360. struct as_istdstring_literal : proto::callable
  361. {
  362. typedef proto::function<
  363. ianystr_p
  364. , proto::terminal<std::string>::type
  365. >::type result_type;
  366. template<typename Expr>
  367. result_type operator()(Expr const &expr) const
  368. {
  369. result_type that = {istr_, {utility::to_istr(proto::value(expr).c_str())}};
  370. return that;
  371. }
  372. };
  373. ///////////////////////////////////////////////////////////////////////////
  374. // Transforms
  375. ///////////////////////////////////////////////////////////////////////////
  376. struct skip_primitives : proto::transform<skip_primitives>
  377. {
  378. template<typename Expr, typename State, typename Data>
  379. struct impl : proto::transform_impl<Expr, State, Data>
  380. {
  381. typedef
  382. typename proto::shift_right<
  383. typename proto::dereference<State>::type
  384. , Expr
  385. >::type
  386. result_type;
  387. result_type operator ()(
  388. typename impl::expr_param expr
  389. , typename impl::state_param state
  390. , typename impl::data_param data
  391. ) const
  392. {
  393. result_type that = {{state}, expr};
  394. return that;
  395. }
  396. };
  397. };
  398. ///////////////////////////////////////////////////////////////////////////
  399. // Grammar
  400. ///////////////////////////////////////////////////////////////////////////
  401. using proto::_;
  402. struct SpiritGrammar;
  403. struct SpiritCaseSensitivePrimitives
  404. : proto::or_<
  405. proto::when<CharParser, as_ichar_parser(_)>
  406. , proto::when<CharLiteral, as_ichar_literal(_)>
  407. , proto::when<NTBSLiteral, as_intbs_literal(_)>
  408. , proto::when<CharRangeParser, as_ichar_range_parser(_)>
  409. , proto::when<StdStringLiteral, as_istdstring_literal(_)>
  410. >
  411. {};
  412. struct SpiritCaseInsensitivePrimitives
  413. : proto::or_<
  414. anychar_p
  415. , IStrParser
  416. , ICharParser
  417. , ICharRangeParser
  418. , proto::complement<SpiritPrimitives>
  419. >
  420. {};
  421. struct SpiritPrimitives
  422. : proto::or_<
  423. SpiritCaseSensitivePrimitives
  424. , SpiritCaseInsensitivePrimitives
  425. >
  426. {};
  427. template<typename Grammar>
  428. struct SpiritComposites
  429. : proto::or_<
  430. proto::bitwise_or< Grammar, Grammar >
  431. , proto::shift_right< Grammar, Grammar >
  432. , proto::minus< Grammar, Grammar >
  433. , proto::dereference< Grammar >
  434. , proto::unary_plus< Grammar >
  435. , proto::logical_not< Grammar >
  436. >
  437. {};
  438. // Regular Spirit grammar, has no-case transforms
  439. struct SpiritGrammar
  440. : proto::or_<
  441. SpiritComposites<SpiritGrammar>
  442. , SpiritPrimitives
  443. >
  444. {};
  445. // Spirit grammar with the skipper transform
  446. struct SkipperGrammar
  447. : proto::or_<
  448. SpiritComposites<SkipperGrammar>
  449. , proto::when<SpiritPrimitives, skip_primitives>
  450. >
  451. {};
  452. ///////////////////////////////////////////////////////////////////////////
  453. // Directives
  454. ///////////////////////////////////////////////////////////////////////////
  455. struct no_case_directive
  456. {
  457. template<typename Expr>
  458. typename boost::result_of<SpiritGrammar(Expr const &)>::type const
  459. operator [](Expr const &expr) const
  460. {
  461. return SpiritGrammar()(expr);
  462. }
  463. };
  464. // no_case
  465. no_case_directive const no_case = {};
  466. template<typename Skipper>
  467. struct skip_directive
  468. {
  469. skip_directive(Skipper const &skip)
  470. : skip_(skip)
  471. {}
  472. template<typename Expr>
  473. typename boost::result_of<SkipperGrammar(Expr const &, Skipper const &)>::type const
  474. operator [](Expr const &expr) const
  475. {
  476. return SkipperGrammar()(expr, this->skip_);
  477. }
  478. private:
  479. Skipper skip_;
  480. };
  481. // skip
  482. template<typename Skipper>
  483. skip_directive<Skipper> skip(Skipper const &skip)
  484. {
  485. return skip_directive<Skipper>(skip);
  486. }
  487. ///////////////////////////////////////////////////////////////////////////
  488. // parse
  489. ///////////////////////////////////////////////////////////////////////////
  490. template<typename FwdIter, typename Rule>
  491. bool parse(FwdIter begin, FwdIter end, Rule const &rule)
  492. {
  493. // make sure the rule corresponds to the Spirit grammar:
  494. BOOST_MPL_ASSERT((proto::matches<Rule, SpiritGrammar>));
  495. spirit_context<FwdIter> ctx(begin, end);
  496. return proto::eval(rule, ctx);
  497. }
  498. // parse with a skip parser can be implemented in one of two ways:
  499. // Method 1)
  500. // The skip parser is passed to all the parsers which invoke it
  501. // before they invoke themselves. This is how Spirit-1 does it,
  502. // and it is the cause of the Scanner Business. However, it has
  503. // the advantage of not needing a parser transformation phase.
  504. // Method 2)
  505. // Transform the expression template to insert the skip parser
  506. // in between all sequenced parsers. That is, transform (A >> B)
  507. // to (*skip >> A >> *skip >> B). This has the advantage of making
  508. // it unnecessary to pass the scanner to all the parsers, which
  509. // means its type doesn't show up in function signatures, avoiding
  510. // the Scanner Business.
  511. // Recommendation:
  512. // Both methods should be supported. Method 1 should be preferred
  513. // when calling parse with parsers defined inline. Method 2 should
  514. // be preferred when a parser expression is assigned to a rule<>,
  515. // thereby making the type of the rule<> independent of the skip
  516. // parser used. I imagine a syntax like:
  517. // rule<> r = skip(space)[A >> B >> C]
  518. template<typename FwdIter, typename Rule, typename Skipper>
  519. bool parse(FwdIter begin, FwdIter end, Rule const &rule, Skipper const &skipper)
  520. {
  521. // make sure the rule corresponds to the Spirit grammar:
  522. BOOST_MPL_ASSERT((proto::matches<Rule, SpiritGrammar>));
  523. //// Method 1: pass skip parser in the context structure.
  524. //spirit_context<FwdIter, Skipper> ctx(begin, end, skipper);
  525. //return proto::eval(rule, ctx);
  526. // Method 2: Embed skip parser via tree transformation.
  527. spirit_context<FwdIter> ctx(begin, end);
  528. return proto::eval(spirit2::skip(skipper)[rule], ctx);
  529. }
  530. }}
  531. using namespace boost;
  532. using namespace spirit2;
  533. void test_toy_spirit()
  534. {
  535. std::string str("abcd");
  536. // This will fail:
  537. BOOST_CHECK(!spirit2::parse(str.begin(), str.end()
  538. , char_ >> char_('a')));
  539. // This will succeed:
  540. BOOST_CHECK(spirit2::parse(str.begin(), str.end()
  541. , char_ >> char_('b') >> char_ >> 'd'));
  542. // This will succeed:
  543. BOOST_CHECK(spirit2::parse(str.begin(), str.end()
  544. , 'a' >> ('c' >> char_ | 'b' >> char_('d') | 'b' >> char_('c')) >> 'd'));
  545. // This will succeed:
  546. BOOST_CHECK(spirit2::parse(str.begin(), str.end()
  547. , *(char_ - 'd')));
  548. // This will succeed:
  549. BOOST_CHECK(spirit2::parse(str.begin(), str.end()
  550. , no_case[char_('A') >> 'B' >> "CD"]));
  551. // This will succeed:
  552. BOOST_CHECK(spirit2::parse(str.begin(), str.end()
  553. , no_case[*char_('A','Z')]));
  554. literal<char> a = lit('a');
  555. literal<char const *> bcd = lit("bcd");
  556. // This will succeed:
  557. BOOST_CHECK(spirit2::parse(str.begin(), str.end()
  558. , +~~a >> no_case[bcd]));
  559. // Scanner Business: R.I.P. :-)
  560. str = "a b cd";
  561. BOOST_CHECK(spirit2::parse(str.begin(), str.end()
  562. , char_('a') >> 'b' >> 'c' >> 'd', space >> space));
  563. }
  564. using namespace boost::unit_test;
  565. ///////////////////////////////////////////////////////////////////////////////
  566. // init_unit_test_suite
  567. //
  568. test_suite* init_unit_test_suite( int argc, char* argv[] )
  569. {
  570. test_suite *test = BOOST_TEST_SUITE("test proto and and toy spirit-2");
  571. test->add(BOOST_TEST_CASE(&test_toy_spirit));
  572. return test;
  573. }