123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205 |
- [#getting_started]
- [section Getting started with Boost.Metaparse]
- [section 1. Introduction]
- This tutorial shows you how to build a parser for a small calculator language
- from the ground up. The goal is not to have a complete calculator, but to show
- you the most common situations one can face while building a parser using
- Metaparse. This tutorial assumes, that you have some template metaprogramming
- experience.
- [section 1.1. Testing environment]
- While you are using Metaparse, you will be writing parsers turning an input text
- into a type. These types can later be processed by further template
- metaprograms. While you are working on your parsers, you'll probably want to
- look at the result of parsing a test input. This tutorial assumes that you can
- use [@http://metashell.org Metashell]. Since the
- [@http://metashell.org/about/demo online demo] makes the Boost
- headers available, you can use that in the tutorial as well.
- If you install Metashell on your computer, make sure that you have the Boost
- libraries and the `getting_started` example of Metaparse on the include path.
- For example, you can start Metashell with the following arguments:
- $ metashell -I$BOOST_ROOT -I$BOOST_ROOT/libs/metaparse/example/getting_started
- `$BOOST_ROOT` refers to the ['boost root directory] (where you have checked
- out the Boost source code).
- This tutorial is long and therefore you might want to make shorter or longer
- breaks while reading it. To make it easy for you to stop at a certain point and
- continue later (or to start in the middle if you are already familiar with the
- basics) Metaparse has a `getting_started` directory in the `example`s. This
- contains the definitions for each section of this tutorial.
- If you're about to start (or continue) this guide from section 5.2.1, you can
- include `5_2_1.hpp`. This will define everything you need to start with that
- section.
- [note
- You have access to these headers in the online Metashell demo as well. For
- example you can include the `<boost/metaparse/getting_started/5_2_1.hpp>`
- header to start from section 5.2.1.
- ]
- [endsect]
- [section 1.2. Using a "regular" testing environment]
- If you have no access to Metashell or you prefer using your regular C++
- development environment while processing this tutorial, this is also possible.
- The tutorial (and usually experimenting with Metaparse) requires that you
- evaluate different template metaprogramming expressions and check their result,
- which is a type. Thus, to try the examples of this tutorial you need a way to
- be able to display the result of evaluating a template metaprogram. This section
- shows you two options.
- [section 1.2.1. Enforcing an error message or a warning containing the result of
- the metafunction call]
- You can either use `boost::mpl::print` or `mpllibs::metamonad::fail_with_type`
- to enforce a warning or an error message containing the result of a metaprogram
- evaluation. For example to see what
- [link BOOST_METAPARSE_STRING `BOOST_METAPARSE_STRING`]`("11 + 2")` refers to,
- you can create a `test.cpp` with the following content:
- #include <boost/metaparse/string.hpp>
- #include <boost/mpl/print.hpp>
- boost::mpl::print<BOOST_METAPARSE_STRING("11 + 2")> x;
- If you try to compile it, the compiler will display warnings containing the
- type the expression
- [link BOOST_METAPARSE_STRING `BOOST_METAPARSE_STRING`]`("11 + 2")` constructs.
- To use this technique for this tutorial, you need to add all the includes and
- definitions the tutorial suggests typing in the shell to your `test.cpp` file.
- When the shell suggests to try to call some metafunction (or you'd like to try
- something out), you need to replace the template argument of `boost::mpl::print`
- with the expression in question and recompile the code.
- [endsect]
- [section 1.2.2. Displaying the result of the metafunction call at runtime]
- You can also display the result of metaprograms at runtime. You can use the
- [@http://boost.org/libs/type_index Boost.TypeIndex] library to do this. For
- example to see what
- [link BOOST_METAPARSE_STRING `BOOST_METAPARSE_STRING`]`("11 + 2")` refers to,
- you can create a `test.cpp` with the following content:
- #include <boost/metaparse/string.hpp>
- #include <boost/type_index.hpp>
- #include <iostream>
- int main()
- {
- std::cout
- << boost::typeindex::type_id_with_cvr<BOOST_METAPARSE_STRING("11 + 2")>()
- << std::endl;
- }
- If you compile and run this code, it will display the type on the standard
- output.
- [endsect]
- [endsect]
- [endsect]
- [section 2. The text to parse]
- With Metaparse you can create template metaprograms parsing an input text. To
- pass the input text to the metaprograms, you need to represent them as types.
- For example let's represent the text `"Hello world"` as a type. The most
- straightforward way of doing it would be creating a variadic template class
- taking the characters of the text as template arguments:
- template <char... Cs>
- struct string;
- The text `"11 + 2"` can be represented the following way:
- string<'1', '1', ' ', '+', ' ', '2'>
- Metaparse provides this type for you. Run the following command in Metashell:
- > #include <boost/metaparse/string.hpp>
- [note
- Note that the `>` character at the beginning of the above code example is the
- prompt of Metashell. It is added to the code examples as a hint to what you
- should run in Metashell (or add to your test `cpp` file if you are using a
- regular development environment).
- ]
- [note
- Note that in the [@http://abel.web.elte.hu/shell/metashell.html online-demo]
- of Metashell you can paste code into the shell by right-clicking on the shell
- somewhere and choosing ['Paste from browser] in the context menu.
- ]
- This will make this type available for you. Now you can try running the
- following command:
- > boost::metaparse::string<'1', '1', ' ', '+', ' ', '2'>
- The shell will echo (almost) the same type back to you. The only difference is
- that it is in a sub-namespace indicating the version of Metaparse being used.
- The nice thing about this representation is that metaprograms can easily access
- the individual characters of the text. The not so nice thing about this
- representation is that if you want to write the text `"Hello world"` in your
- source code, you have to type a lot.
- Metaparse provides a macro that can turn a string literal into an instance of
- [link string `boost::metaparse::string`]. This is the
- [link BOOST_METAPARSE_STRING `BOOST_METAPARSE_STRING`] macro. You get it by
- including `<boost/metaparse/string.hpp>`. Let's try it by running the following
- command in Metashell:
- > BOOST_METAPARSE_STRING("11 + 2")
- You will get the same result as you got by instantiating
- [link string `boost::metaparse::string`] yourself.
- [endsect]
- [section 3. Creating a simple parser]
- [note Note that you can find everything that has been included and defined so far [link before_3 here].]
- Let's try creating a parser. We will start with creating a parser for something
- simple: we will be parsing integer numbers, such as the text `"13"`. You can
- think of this first parsing exercise as a ['template metaprogramming
- string-to-int conversion] because we expect to get the value `13` as the result
- of parsing.
- [note
- You know the difference between `"13"` and `13` in C++. One of them is a
- character array, the other one is an integral value. But what is the
- difference between them in template metaprogramming? They are represented by
- different types. For example `"13"` is represented by
- [link string `string`]`<'1', '3'>` while `13` is represented by
- `std::integral_constant<int, 13>`.
- ]
- To build a parser, we need to specify the grammar to use. Metaparse provides
- building blocks (called parsers) we can use to do this and one of them is the
- [link int_ `int_`] parser which does exactly what we need: it parses integers.
- To make it available, we need to include it:
- > #include <boost/metaparse/int_.hpp>
- Our grammar is simple: [link int_ `int_`]. (Don't worry, we'll parse more
- complicated languages later).
- A parser is a [link metafunction_class template metafunction class]. It can be
- used directly, but its interface is designed for completeness and not for ease
- of use. Metaparse provides the [link build_parser `build_parser`]
- [link metafunction metafunction] that adds a wrapper to parsers with a simple
- interface.
- [note
- In this tutorial, we will always be wrapping our parsers with this. We will
- call these wrapped parsers parsers as well. If you are interested in it, you
- can learn about the complete interface of parsers [link parser here].
- ]
- Let's create a parser using [link int_ `int_`] and
- [link build_parser `build_parser`]:
- > #include <boost/metaparse/build_parser.hpp>
- > using namespace boost::metaparse;
- > using exp_parser1 = build_parser<int_>;
- [link getting_started_0 copy-paste friendly version]
- First we need to include `build_parser.hpp` to make
- [link build_parser `build_parser`] available. Then we make our lives easier by
- running `using namespace boost::metaparse;`. The third command defines the
- parser: we need to instantiate the [link build_parser `build_parser`] template
- class with our parser ([link int_ `int_`] in this case) as argument.
- Now that we have a parser, let's parse some text with it (if you haven't done it
- yet, include `boost/metaparse/string.hpp`):
- > exp_parser1::apply<BOOST_METAPARSE_STRING("13")>::type
- mpl_::integral_c<int, 13>
- `exp_parser1` is a [link metafunction_class template metafunction class] taking
- the input text as it's argument and it returns the integral representation of
- the number in the string. Try it with different numbers and see how it converts
- them.
- [section 3.1. Dealing with invalid input]
- [note Note that you can find everything that has been included and defined so far [link before_3_1 here].]
- Have you tried parsing an invalid input? Something that is not a number, such
- as:
- > exp_parser1::apply<BOOST_METAPARSE_STRING("thirteen")>::type
- << compilation error >>
- Well, `"thirteen"` ['is] a number, but our parser does not speak English, so it
- is considered as invalid input. As a result of this, compilation fails and you
- get a compilation error from Metashell.
- In the [@#dealing-with-invalid-input-1 Dealing with invalid input] section we
- will go into further details on error handling.
- [endsect]
- [section 3.2. Dealing with input containing more than what is needed]
- [note Note that you can find everything that has been included and defined so far [link before_3_2 here].]
- Let's try to give the parser two numbers instead of one:
- > exp_parser1::apply<BOOST_METAPARSE_STRING("11 13")>::type
- mpl_::integral_c<int, 11>
- You might be surprised by this: the parser did not return an error. It parsed
- the first number, `11` and ignored `13`. The way [link int_ `int_`] works is
- that it parses the number at the beginning of the input text and ignores the
- rest of the input.
- So `exp_parser1` has a bug: our little language consists of ['one] number, not a
- ['list of numbers]. Let's fix our parser to treat more than one numbers as an
- invalid input:
- > #include <boost/metaparse/entire_input.hpp>
- This gives us the [link entire_input `entire_input`] template class. We can
- wrap [link int_ `int_`] with [link entire_input `entire_input`] indicating
- that the number we parse with [link int_ `int_`] should be the entire input.
- Anything that comes after that is an error. So our parser is
- [link entire_input `entire_input`]`<`[link int_ `int_`]`>` now. Let's wrap it
- with [link build_parser `build_parser`]:
- > using exp_parser2 = build_parser<entire_input<int_>>;
- Let's try this new parser out:
- > exp_parser2::apply<BOOST_METAPARSE_STRING("13")>::type
- mpl_::integral_c<int, 13>
- It can still parse numbers. Let's try to give it two numbers:
- > exp_parser2::apply<BOOST_METAPARSE_STRING("11 13")>::type
- << compilation error >>
- This generates a compilation error, since the parser failed.
- [endsect]
- [section 3.3. Accepting optional whitespaces at the end of the input]
- [note Note that you can find everything that has been included and defined so far [link before_3_3 here].]
- Our parser became a bit too
- restrictive now. It doesn't allow ['anything] after the number, not even
- whitespaces:
- > exp_parser2::apply<BOOST_METAPARSE_STRING("11 ")>::type
- << compilation error >>
- Let's allow whitespaces after the number:
- > #include <boost/metaparse/token.hpp>
- This makes the [link token `token`] template class available. It takes a parser
- as its argument and allows optional whitespaces after that. Let's create a third
- parser allowing whitespaces after the number:
- > using exp_parser3 = build_parser<entire_input<token<int_>>>;
- We expect [link token `token`]`<`[link int_ `int_`]`>` to be the entire input
- in this case. We allow optional whitespaces after [link int_ `int_`] but
- nothing else:
- > exp_parser3::apply<BOOST_METAPARSE_STRING("11 ")>::type
- mpl_::integral_c<int, 11>
- [endsect]
- [endsect]
- [section 4. Parsing simple expressions]
- [note Note that you can find everything that has been included and defined so far [link before_4 here].]
- We can parse numbers. Let's try parsing something more complicated, such as
- `"11 + 2"`. This is a number followed by a `+` symbol followed by another
- number. [link int_ `int_`] (or [link token `token`]`<`[link int_ `int_`]`>`)
- implements the parser for one number.
- First, let's write a parser for the `+` symbol. We can use the following:
- > #include <boost/metaparse/lit_c.hpp>
- This gives us [link lit_c `lit_c`] which we can use to parse specific
- characters, such as `+`. The grammar parsing the `+` character can be
- represented by [link lit_c `lit_c`]`<'+'>`. To allow optional whitespaces after
- it, we should use [link token `token`]`<`[link lit_c `lit_c`]`<'+'>>`.
- So to parse `"11 + 2"` we need the following sequence of parsers:
- token<int_> token<lit_c<'+'>> token<int_>
- Metaparse provides [link sequence `sequence`] for parsing the sequence of
- things:
- > #include <boost/metaparse/sequence.hpp>
- We can implement the parser for our expressions using
- [link sequence `sequence`]:
- sequence<token<int_>, token<lit_c<'+'>>, token<int_>>
- Let's create a parser using it:
- > using exp_parser4 = build_parser<sequence<token<int_>, token<lit_c<'+'>>, token<int_>>>;
- Try parsing a simple expression using it:
- > exp_parser4::apply<BOOST_METAPARSE_STRING("11 + 2")>::type
- boost::mpl::v_item<mpl_::integral_c<int, 2>, boost::mpl::v_item<mpl_::char_<'+'>
- , boost::mpl::v_item<mpl_::integral_c<int, 11>, boost::mpl::vector0<mpl_::na>, 0
- >, 0>, 0>
- What you get might look strange to you. It is a `vector` from [Boost.MPL](
- http://boost.org/libs/mpl). What you can see in the shell is the way this vector
- is represented. Metashell offers
- [pretty printing](metashell.org/manual/getting_started#data-structures-of-boostmpl)
- for [@http://boost.org/libs/mpl Boost.MPL] containers:
- > #include <metashell/formatter.hpp>
- After including this header, try parsing again:
- > exp_parser4::apply<BOOST_METAPARSE_STRING("11 + 2")>::type
- boost_::mpl::vector<mpl_::integral_c<int, 11>, mpl_::char_<'+'>, mpl_::integral_c<int, 2> >
- What you get now looks more simple: this is a vector of three elements:
- * `mpl_::integral_c<int, 11>` This is the result of parsing with
- [link token `token`]`<`[link int_ `int_`]`>`.
- * `mpl_::char_<'+'>` This is the result of parsing with
- [link token `token`]`<`[link lit_c `lit_c`]`<'+'>>`.
- * `mpl_::integral_c<int, 2> >` This is the result of parsing with
- [link token `token`]`<`[link int_ `int_`]`>`.
- The result of parsing with a [link sequence `sequence`] is the `vector` of the
- individual parsing results.
- [section 4.1. Tokenizer]
- [note Note that you can find everything that has been included and defined so far [link before_4_1 here].]
- You might have noticed that our parsers have no separate tokenizers.
- Tokenization is part of the parsing process. However, it makes the code of the
- parsers cleaner if we separate the two layers. The previous example has two
- types of tokens:
- * a number (eg. `13`)
- * a `+` symbol
- In our last solution we parsed them by using the
- [link token `token`]`<`[@int_html `int_`]`>` and
- [link token `token`]`<`[link lit_c `lit_c`]`<'+'>>` parsers. Have you noticed
- a pattern? We wrap the parsers of the tokens with [link token `token`]`<...>`.
- It is not just syntactic sugar. Our tokens might be followed (separated) by
- whitespaces, which can be ignored. That is what [link token `token`]`<...>`
- implements.
- So let's make the implementation of `exp_parser` cleaner by separating the
- tokenization from the rest of the parser:
- > using int_token = token<int_>;
- > using plus_token = token<lit_c<'+'>>;
- [link getting_started_1 copy-paste friendly version]
- These two definitions create type aliases for the parsers of our tokens. For the
- compiler it doesn't matter if we use `plus_token` or
- [link token `token`]`<`[link lit_c `lit_c`]`<'+'>>`, since they refer to the
- same type. But it makes the code of the parser easier to understand.
- We can now define our expression parser using these tokens:
- > using exp_parser5 = build_parser<sequence<int_token, plus_token, int_token>>;
- We can use it the same way as `exp_parser4`:
- > exp_parser5::apply<BOOST_METAPARSE_STRING("11 + 2")>::type
- boost_::mpl::vector<mpl_::integral_c<int, 11>, mpl_::char_<'+'>, mpl_::integral_c<int, 2> >
- [endsect]
- [section 4.2. Evaluating the expression]
- [note Note that you can find everything that has been included and defined so far [link before_4_2 here].]
- It would be nice if we could evaluate the expression as well. Instead of
- returning a `vector` as the result of parsing, we should return the evaluated
- expression. For example the result of parsing `"11 + 2"` should be
- `mpl_::integral_c<int, 13>`.
- Metaparse provides [link transform `transform`] which we can use to implement
- this:
- > #include <boost/metaparse/transform.hpp>
- This can be used to transform the result of a parser. For example we have the
- [link sequence `sequence`]`<int_token, plus_token, int_token>` parser which
- returns a `vector`. We want to transform this `vector` into a number, which is
- the result of evaluating the expression. We need to pass
- [link transform `transform`] the [link sequence `sequence`]`<...>` parser and
- a function which turns the `vector` into the result we need. First let's create
- this [link metafunction metafunction]:
- > #include <boost/mpl/plus.hpp>
- > #include <boost/mpl/at.hpp>
- > template <class Vector> \
- ...> struct eval_plus : \
- ...> boost::mpl::plus< \
- ...> typename boost::mpl::at_c<Vector, 0>::type, \
- ...> typename boost::mpl::at_c<Vector, 2>::type \
- ...> > {};
- [link getting_started_2 copy-paste friendly version]
- [note
- Note that if the last character of your command is the `\` character in
- Metashell, then the shell assumes that you will continue typing the same command
- and waits for that before evaluating your command. When Metashell is waiting for
- the second (or third, or fourth, etc) line of a command, it uses a special
- prompt, `...>`.
- ]
- What it does is that using `boost::mpl::at_c` it takes the first (index 0) and
- the third (index 2) elements of the `vector` that is the result of parsing with
- [link sequence `sequence`]`<...>` and adds them. We can try it out with an
- example `vector`:
- > eval_plus< \
- ...> boost::mpl::vector< \
- ...> mpl_::integral_c<int, 11>, \
- ...> mpl_::char_<'+'>, \
- ...> mpl_::integral_c<int, 2> \
- ...> >>::type
- mpl_::integral_c<int, 13>
- [link getting_started_3 copy-paste friendly version]
- We can use `eval_plus` to build a parser that evaluates the expression it
- parses:
- > #include <boost/mpl/quote.hpp>
- > using exp_parser6 = \
- ...> build_parser< \
- ...> transform< \
- ...> sequence<int_token, plus_token, int_token>, \
- ...> boost::mpl::quote1<eval_plus> \
- ...> > \
- ...> >;
- [link getting_started_4 copy-paste friendly version]
- [note
- Note that we have to use `boost::mpl::quote1` to turn our `eval_plus`
- [link metafunction metafunction] into a
- [link metafunction_class metafunction class].
- ]
- [link transform `transform`] parses the input using
- [link sequence `sequence`]`<int_token, plus_token, int_token>` and transforms
- the result of that using `eval_plus`. Let's try it out:
- > exp_parser6::apply<BOOST_METAPARSE_STRING("11 + 2")>::type
- mpl_::integral_c<int, 13>
- We have created a simple expression parser. The following diagram shows how it
- works:
- [$images/metaparse/tutorial_diag0.png [width 50%]]
- The rounded boxes in the diagram are the parsers parsing the input, which are
- functions ([link metafunction_class template metafunction class]es). The arrows
- represent how the results are passed around between these parsers (they are the
- return values of the function calls).
- It uses [link sequence `sequence`] to parse the different elements (the first
- number, the `+` symbol and the second number) and builds a `vector`. The final
- result is calculated from that `vector` by the [link transform `transform`]
- parser.
- [endsect]
- [endsect]
- [section 5. Parsing longer expressions]
- [note Note that you can find everything that has been included and defined so far [link before_5 here].]
- We can parse simple expressions adding two numbers together. But we can't parse
- expressions adding three, four or maybe more numbers together. In this section
- we will implement a parser for expressions adding lots of numbers together.
- [section 5.1. Parsing a subexpression repeatedly]
- [note Note that you can find everything that has been included and defined so far [link before_5_1 here].]
- We can't solve this problem with [link sequence `sequence`], since we don't
- know how many numbers the input will have. We need a parser that:
- * parses the first number
- * keeps parsing `+ <number>` elements until the end of the input
- Parsing the first number is something we can already do: the `int_token` parser
- does it for us. Parsing the `+ <number>` elements is more tricky. Metaparse
- offers different tools for approaching this. The most simple is
- [link repeated `repeated`]:
- > #include <boost/metaparse/any.hpp>
- [link repeated `repeated`] needs a parser (which parses one `+ <number>`
- element) and it keeps parsing the input with it as long as it can. This will
- parse the entire input for us. Let's create a parser for our expressions using
- it:
- > using exp_parser7 = \
- ...> build_parser< \
- ...> sequence< \
- ...> int_token, /* The first <number> */ \
- ...> repeated<sequence<plus_token, int_token>> /* The "+ <number>" elements */ \
- ...> > \
- ...> >;
- [link getting_started_5 copy-paste friendly version]
- We have a [link sequence `sequence`] with two elements:
- * The first number (`int_token`)
- * The `+ <number>` parts
- The second part is an [link repeated `repeated`], which parses the `+ <number>`
- elements. One such element is parsed by
- [link sequence `sequence`]`<plus_token, int_token>`. This is just a sequence of
- the `+` symbol and the number.
- Let's try parsing an expression using this:
- > exp_parser7::apply<BOOST_METAPARSE_STRING("1 + 2 + 3 + 4")>::type
- Here is a formatted version of the result which is easier to read:
- boost_::mpl::vector<
- // The result of int_token
- mpl_::integral_c<int, 1>,
-
- // The result of repeated< sequence<plus_token, int_token> >
- boost_::mpl::vector<
- boost_::mpl::vector<mpl_::char_<'+'>, mpl_::integral_c<int, 2> >,
- boost_::mpl::vector<mpl_::char_<'+'>, mpl_::integral_c<int, 3> >,
- boost_::mpl::vector<mpl_::char_<'+'>, mpl_::integral_c<int, 4> >
- >
- >
- The result is a `vector` of two elements. The first element of this `vector` is
- the result of parsing the input with `int_token`, the second element of this
- `vector` is the result of parsing the input with
- [link repeated `repeated`]`< `[link sequence `sequence`]`<plus_token, int_token>>`.
- This second element is also a `vector`. Each element of this `vector` is the
- result of parsing the input with
- [link sequence `sequence`]`<plus_token, int_token>` once. Here is a diagram
- showing how `exp_parser7` parses the input `1 + 2 + 3 + 4`:
- [$images/metaparse/tutorial_diag1.png [width 90%]]
- The diagram shows that the `+ <number>` elements are parsed by
- [link sequence `sequence`]`<plus_token, int_token>` elements and their results
- are collected by [link repeated `repeated`], which constructs a `vector` of
- these results. The value of the first `<number>` and this `vector` are placed in
- another `vector`, which is the result of parsing.
- [endsect]
- [section 5.2. Evaluating the parsed expression]
- [note Note that you can find everything that has been included and defined so far [link before_5_2 here].]
- The final result here is a pair of the first number and the `vector` of the rest
- of the values. To calculate the result we need to process that data structure.
- Let's give the example output we have just parsed a name. This will make it
- easier to test the code calculating the final result from this structure:
- > using temp_result = exp_parser7::apply<BOOST_METAPARSE_STRING("1 + 2 + 3 + 4")>::type;
- Now we can write a [link metafunction template metafunction] turning this
- structure into the result of the calculation this structure represents.
- [section 5.2.1. Learning about `boost::mpl::fold`]
- [note Note that you can find everything that has been included and defined so far [link before_5_2_1 here].]
- We have a `vector` containing
- another `vector`. Therefore, we will need to be able to summarise the elements
- of different `vector`s. We can use the `boost::mpl::fold`
- [link metafunction metafunction] to do this:
- > #include <boost/mpl/fold.hpp>
- With this [link metafunction metafunction], we can iterate over a `vector` of
- parsed numbers and summarise them. We can provide it a
- [link metafunction metafunction] taking two arguments: the sum we have so far
- and the next element of the `vector`. This [link metafunction metafunction]
- will be called for every element of the `vector`.
- [note
- Note that this is very similar to the `std::accumulate` algorithm.
- [@http://boost.org/libs/mpl Boost.MPL] provides `boost::mpl::accumulate` as
- well, which is a synonym for `boost::mpl::fold`. This tutorial (and Metaparse)
- uses the name `fold`.
- ]
- Let's start with a simple case: a `vector` of numbers. For example let's
- summarise the elements of the following `vector`:
- > using vector_of_numbers = \
- ...> boost::mpl::vector< \
- ...> boost::mpl::int_<2>, \
- ...> boost::mpl::int_<5>, \
- ...> boost::mpl::int_<6> \
- ...> >;
- [link getting_started_6 copy-paste friendly version]
- We will write a [link metafunction template metafunction], `sum_vector` for
- summarising the elements of a `vector` of numbers:
- > template <class Vector> \
- ...> struct sum_vector : \
- ...> boost::mpl::fold< \
- ...> Vector, \
- ...> boost::mpl::int_<0>, \
- ...> boost::mpl::lambda< \
- ...> boost::mpl::plus<boost::mpl::_1, boost::mpl::_2> \
- ...> >::type \
- ...> > \
- ...> {};
- [link getting_started_7 copy-paste friendly version]
- This [link metafunction metafunction] takes the `vector` to summarise the
- elements of as its argument and uses `boost::mpl::fold` to calculate the sum.
- `boost::mpl::fold` takes three arguments:
- * The container to summarise. This is `Vector`.
- * The starting value for ['the sum we have so far]. Using `0` means that we want
- to start the sum from `0`.
- * The function to call in every iteration while looping over the container. We
- are using a
- [@http://www.boost.org/libs/mpl/doc/refmanual/lambda-expression.html lambda expression]
- in our example, which is the expression wrapped by `boost::mpl::lambda`. This
- expression adds its two arguments together using `boost::mpl::plus`. The
- lambda expression refers to its arguments by `boost::mpl::_1` and
- `boost::mpl::_2`.
- Let's try this [link metafunction metafunction] out:
- > sum_vector<vector_of_numbers>::type
- mpl_::integral_c<int, 13>
- It works as expected. Here is a diagram showing how it works:
- [$images/metaparse/tutorial_diag2.png [width 50%]]
- As the diagram shows, `boost::mpl::fold` evaluates the lambda expression for
- each element of the `vector` and passes the result of the previous evaluation to
- the next lambda expression invocation.
- We have a [link metafunction metafunction] that can summarise a `vector` of
- numbers. The result of parsing the `+ <number>` elements is a `vector` of
- `vector`s. As a recap, here is `temp_result`:
- boost_::mpl::vector<
- // The result of int_token
- mpl_::integral_c<int, 1>,
-
- // The result of repeated< sequence<plus_token, int_token> >
- boost_::mpl::vector<
- boost_::mpl::vector<mpl_::char_<'+'>, mpl_::integral_c<int, 2> >,
- boost_::mpl::vector<mpl_::char_<'+'>, mpl_::integral_c<int, 3> >,
- boost_::mpl::vector<mpl_::char_<'+'>, mpl_::integral_c<int, 4> >
- >
- >
- First let's summarise the result of [link repeated `repeated`]`<...>` using
- `boost::mpl::fold`. This is a `vector` of `vector`s, but that's fine.
- `boost::mpl::fold` doesn't care about what the elements of the `vector` are.
- They can be numbers, `vector`s or something else as well. The function we use to
- add two numbers together (which was a lambda expression in our previous example)
- gets these elements as its argument and has to deal with them. So to summarise
- the elements of the `vector`s we get as the result of parsing with
- [link repeated `repeated`]`<...>`, we need to write a
- [link metafunction metafunction] that can deal with these elements. One such
- element is `boost_::mpl::vector<mpl_::char<'+'>, mpl_::integral_c<int, 2>>`.
- Here is a [link metafunction metafunction] that can be used in a
- `boost::mpl::fold`:
- > template <class Sum, class Item> \
- ...> struct sum_items : \
- ...> boost::mpl::plus< \
- ...> Sum, \
- ...> typename boost::mpl::at_c<Item, 1>::type \
- ...> > \
- ...> {};
- [link getting_started_8 copy-paste friendly version]
- This function takes two arguments:
- * `Sum`, which is a number. This is the summary of the already processed
- elements.
- * `Item`, the next item of the `vector`. These items are `vector`s of size two:
- the result of parsing the `+` symbol and the number.
- The [link metafunction metafunction] adds the sum we have so far and the next
- number together using the `boost::mpl::plus` [link metafunction metafunction].
- To get the next number out of `Item`, it uses `boost::mpl::at_c`. Let's try
- `sum_items` out:
- > sum_items< \
- ...> mpl_::integral_c<int, 1>, \
- ...> boost::mpl::vector<mpl_::char_<'+'>, mpl_::integral_c<int, 2>> \
- ...> >::type
- mpl_::integral_c<int, 3>
- [link getting_started_9 copy-paste friendly version]
- We have called `sum_items` with values from `temp_result` and saw that it works
- as expected: it added the partial sum (`mpl_::integral_c<int, 1>`) to the next
- number (`mpl_::integral_c<int, 2>`).
- `boost::mpl::fold` can summarise the list we get as the result of parsing the
- `+ <number>` elements of the input, so we need to extract this list from
- `temp_result` first:
- > boost::mpl::at_c<temp_result, 1>::type
- Here is the formatted version of the result:
- boost_::mpl::vector<
- boost_::mpl::vector<mpl_::char_<'+'>, mpl_::integral_c<int, 2>>,
- boost_::mpl::vector<mpl_::char_<'+'>, mpl_::integral_c<int, 3>>,
- boost_::mpl::vector<mpl_::char_<'+'>, mpl_::integral_c<int, 4>>
- >
- This is the second element of the `temp_result` vector (the first one is the
- value of the first `<number>` element). Let's try fold out for this:
- > \
- ...> boost::mpl::fold< \
- ...> boost::mpl::at_c<temp_result, 1>::type, /* The vector to summarise */ \
- ...> boost::mpl::int_<0>, /* The value to start the sum from */ \
- ...> boost::mpl::quote2<sum_items> /* The function to call in each iteration */ \
- ...> >::type
- mpl_::integral_c<int, 9>
- [link getting_started_10 copy-paste friendly version]
- [note
- We are using `sum_items` as the function to call in each iteration. We are
- passing a [link metafunction metafunction] (`sum_items`) to another
- [link metafunction metafunction] (`boost::mpl::fold`) as an argument. To be
- able to do this, we need to turn it into a
- [link metafunction_class template metafunction class] using
- `boost::mpl::quote2` (`2` means that it takes two arguments).
- ]
- As we have seen, the result of this is the sum of the elements, which was `9` in
- our case. Here is a diagram showing how `boost::mpl::fold` works:
- [$images/metaparse/tutorial_diag3.png [width 50%]]
- It starts with the value `boost::mpl::int_<0>` and adds the elements of the
- `boost_::mpl::vector` containing the parsing results one by one. The diagram
- shows how the subresults are calculated and then used for further calculations.
- [endsect]
- [section 5.2.2. Evaluating the expression using `boost::mpl::fold`]
- [note Note that you can find everything that has been included and defined so far [link before_5_2_2 here].]
- Let's use `sum_items` with `boost::mpl::fold` to build the parser that
- summarises the values coming from the `+ <number>` elements. We can extend the
- parser we were using in `exp_parser7` by wrapping the
- [link repeated `repeated`]`<...>` part with [link transform `transform`], which
- transforms the result of [link repeated `repeated`]`<...>` with the folding
- expression we have just created:
- > using exp_parser8 = \
- ...> build_parser< \
- ...> sequence< \
- ...> int_token, /* parse the first <number> */ \
- ...> transform< \
- ...> repeated<sequence<plus_token, int_token>>, /* parse the "+ <number>" elements */ \
- ...> \
- ...> /* lambda expression summarising the "+ <number>" elements using fold */ \
- ...> boost::mpl::lambda< \
- ...> /* The folding expression we have just created */ \
- ...> boost::mpl::fold< \
- ...> boost::mpl::_1, /* the argument of the lambda expression, the result */ \
- ...> /* of the repeated<...> parser */ \
- ...> boost::mpl::int_<0>, \
- ...> boost::mpl::quote2<sum_items> \
- ...> > \
- ...> >::type \
- ...> > \
- ...> > \
- ...> >;
- [link getting_started_11 copy-paste friendly version]
- It uses [link transform `transform`] to turn the result of the previous version
- of our parser into one that summarises the `+ <number>` elements. Let's try it
- out:
- > exp_parser8::apply<BOOST_METAPARSE_STRING("1 + 2 + 3 + 4")>::type
- boost_::mpl::vector<mpl_::integral_c<int, 1>, mpl_::integral_c<int, 9> >
- This returns a pair of numbers as the result of parsing: the first number and
- the sum of the rest. To get the value of the entire expression we need to add
- these two numbers together. We can extend our parser to do this final addition
- as well:
- > using exp_parser9 = \
- ...> build_parser< \
- ...> transform< \
- ...> /* What we had so far */ \
- ...> sequence< \
- ...> int_token, \
- ...> transform< \
- ...> repeated<sequence<plus_token, int_token>>, \
- ...> boost::mpl::lambda< \
- ...> boost::mpl::fold< \
- ...> boost::mpl::_1, \
- ...> boost::mpl::int_<0>, \
- ...> boost::mpl::quote2<sum_items> \
- ...> > \
- ...> >::type \
- ...> > \
- ...> >, \
- ...> boost::mpl::quote1<sum_vector> /* summarise the vector of numbers */ \
- ...> > \
- ...> >;
- [link getting_started_12 copy-paste friendly version]
- `exp_parser9` wraps the parser we had so far (which gives us the two element
- `vector` as the result) with [link transform `transform`] to add the elements
- of that two element `vector` together. Since that two element `vector` is a
- `vector` of numbers, we can (re)use the `sum_vector`
- [link metafunction metafunction] for this. Let's try it out:
- > exp_parser9::apply<BOOST_METAPARSE_STRING("1 + 2 + 3 + 4")>::type
- mpl_::integral_c<int, 10>
- It gives us the correct result, but it is very inefficient. Let's see why:
- [$images/metaparse/tutorial_diag4.png [width 90%]]
- There are two loops in this process:
- * first [link repeated `repeated`] loops over the input to parse all of the
- `+ <number>` elements. It builds a `vector` during this. (`Loop 1` on the
- diagram)
- * then `boost::mpl::fold` loops over this `vector` to summarise the elements.
- (`Loop 2` on the diagram)
- [note
- Note that we have been talking about ['loop]s while there is no such thing as
- a loop in template metaprogramming. Loops can be implemented using
- ['recursion]: every recursive call is one iteration of the loop. The loop is
- stopped at the bottom of the recursive chain.
- ]
- [endsect]
- [section 5.2.3. Using a folding parser combinator]
- [note Note that you can find everything that has been included and defined so far [link before_5_2_3 here].]
- It would be nice, if the two loops could be merged together and the temporary
- `vector` wouldn't have to be built in the middle (don't forget: there is no
- such thing as a ['garbage collector] for template metaprogramming. Once you
- instantiate a template, it will be available until the end of ... the
- compilation).
- Metaparse provides the [link foldl `foldl`] parser combinator:
- > #include <boost/metaparse/foldl.hpp>
- It is almost the same as `boost::mpl::fold`, but instead of taking the `vector`
- as its first argument, which was coming from the repeated application of a
- parser ([link sequence `sequence`]`<plus_token, int_token>`) on the input, it
- takes the parser itself. [link foldl `foldl`] parses the input and calculates
- the summary on the fly. Here is how we can write our parser using it:
- > using exp_parser10 = \
- ...> build_parser< \
- ...> transform< \
- ...> sequence< \
- ...> int_token, \
- ...> foldl< \
- ...> sequence<plus_token, int_token>, \
- ...> boost::mpl::int_<0>, \
- ...> boost::mpl::quote2<sum_items> \
- ...> > \
- ...> >, \
- ...> boost::mpl::quote1<sum_vector>> \
- ...> >;
- [link getting_started_13 copy-paste friendly version]
- Here are the formatted versions of `exp_parser9` and `exp_parser10`
- side-by-side:
- // exp_parser9 exp_parser10
-
- build_parser< build_parser<
- transform< transform<
- sequence< sequence<
- int_token, int_token,
-
-
- transform< foldl<
- repeated<sequence<plus_token, int_token>>, sequence<plus_token, int_token>,
- boost::mpl::lambda<
- boost::mpl::fold<
- boost::mpl::_1,
- boost::mpl::int_<0>, boost::mpl::int_<0>,
- boost::mpl::quote2<sum_items> boost::mpl::quote2<sum_items>
- >
- >::type
- > >
-
-
- >, >,
- boost::mpl::quote1<sum_vector> boost::mpl::quote1<sum_vector>
- > >
- > >
- [link getting_started_14 copy-paste friendly version]
- In `exp_parser10` the "_[link repeated `repeated`] and then
- [link transform `transform`] with `boost::mpl::fold`_" part (the middle block of
- `exp_parser9`) has been replaced by one [link foldl `foldl`] parser that does
- the same thing but without building a `vector` in the middle. The same starting
- value (`boost::mpl::int_<0>`) and callback function (`sum_items`) could be used.
- Here is a diagram showing how `exp_parser10` works:
- [$images/metaparse/tutorial_diag5.png [width 90%]]
- In this case, the results of the
- [link sequence `sequence`]`<plus_token, int_token>` parsers are passed directly
- to a folding algorithm without an intermediate `vector`. Here is a diagram
- showing `exp_parser9` and `exp_parser10` side-by-side to make it easier to see
- the difference:
- [$images/metaparse/tutorial_diag6.png [width 90%]]
- [endsect]
- [section 5.2.4. Processing the initial element with the folding parser combinator]
- [note Note that you can find everything that has been included and defined so far [link before_5_2_4 here].]
- This solution can still be improved. The [link foldl `foldl`] summarising the
- `+ <number>` elements starts from `0` and once this is done, we add the value of
- the first `<number>` of the input to it in the first iteration. It would be more
- straightforward if [link foldl `foldl`] could use the value of the first
- `<number>` as the initial value of the "['sum we have so far]". Metaparse
- provides [link foldl_start_with_parser `foldl_start_with_parser`] for this:
- > #include <boost/metaparse/foldl_start_with_parser.hpp>
- [link foldl_start_with_parser `foldl_start_with_parser`] is almost the same as
- [link foldl `foldl`]. The difference is that instead of taking a starting
- ['value] for the sum it takes a ['parser]. First it parses the input with this
- parser and uses the value it returns as the starting value. Here is how we can
- implement our parser using it:
- > using exp_parser11 = \
- ...> build_parser< \
- ...> foldl_start_with_parser< \
- ...> sequence<plus_token, int_token>, /* apply this parser repeatedly */ \
- ...> int_token, /* use this parser to get the initial value */ \
- ...> boost::mpl::quote2<sum_items> /* use this function to add a new value to the summary */ \
- ...> > \
- ...> >;
- [link getting_started_15 copy-paste friendly version]
- This version of `exp_parser` uses
- [link foldl_start_with_parser `foldl_start_with_parser`]. This implementation is
- more compact than the earlier versions. There is no [link sequence `sequence`]
- element in this: the first `<number>` is parsed by `int_token` and its value is
- used as the initial value for the summary. Let's try it out:
- > exp_parser11::apply<BOOST_METAPARSE_STRING("1 + 2 + 3 + 4")>::type
- mpl_::integral_c<int, 10>
- It returns the same result as the earlier version but works differently. Here is
- a diagram showing how this implementation works:
- [$images/metaparse/tutorial_diag7.png [width 90%]]
- [endsect]
- [endsect]
- [endsect]
- [section 6. Adding support for other operators]
- [note Note that you can find everything that has been included and defined so far [link before_6 here].]
- Our parsers now support expressions adding numbers together. In this section we
- will add support for the `-` operator, so expressions like `1 + 2 - 3` can be
- evaluated.
- [section 6.1. Parsing expressions containing `-` operators]
- [note Note that you can find everything that has been included and defined so far [link before_6_1 here].]
- Currently we use the `plus_token` for parsing "the" operator, which has to be
- `+`. We can define a new token for parsing the `-` symbol:
- > using minus_token = token<lit_c<'-'>>;
- We need to build a parser that accepts either a `+` or a `-` symbol. This can be
- implemented using [link one_of `one_of`]:
- > #include <boost/metaparse/one_of.hpp>
- [link one_of `one_of`]`<plus_token, minus_token>` is a parser which accepts
- either a `+` (using `plus_token`) or a `-` (using `minus_token`) symbol. The
- result of parsing is the result of the parser that succeeded.
- [note
- You can give any parser to [link one_of `one_of`], therefore it is possible
- that more than one of them can parse the input. In those cases the order
- matters: [link one_of `one_of`] tries parsing the input with the parsers from
- left to right and the first one that succeeds, wins.
- ]
- Using this, we can make our parser accept subtractions as well:
- > using exp_parser12 = \
- ...> build_parser< \
- ...> foldl_start_with_parser< \
- ...> sequence<one_of<plus_token, minus_token>, int_token>, \
- ...> int_token, \
- ...> boost::mpl::quote2<sum_items> \
- ...> > \
- ...> >;
- [link getting_started_16 copy-paste friendly version]
- It uses [link one_of `one_of`]`<plus_token, minus_token>` as the separator for
- the numbers. Let's try it out:
- > exp_parser12::apply<BOOST_METAPARSE_STRING("1 + 2 - 3")>::type
- mpl_::integral_c<int, 6>
- The result is not correct. The reason for this is that `sum_items`, the function
- we summarise with ignores which operator was used and assumes that it is always
- `+`.
- [endsect]
- [section 6.2. Evaluating expressions containing `-` operators]
- [note Note that you can find everything that has been included and defined so far [link before_6_2 here].]
- To fix the evaluation of expressions containing subtractions, we need to fix
- the function we use for summarising. We need to write a version that takes the
- operator being used into account.
- First of all we will need the `boost::mpl::minus`
- [link metafunction metafunction] for implementing subtraction:
- > #include <boost/mpl/minus.hpp>
- Let's write a helper metafunction that takes three arguments: the left operand,
- the operator and the right operand:
- > template <class L, char Op, class R> struct eval_binary_op;
- > template <class L, class R> struct eval_binary_op<L, '+', R> : boost::mpl::plus<L, R>::type {};
- > template <class L, class R> struct eval_binary_op<L, '-', R> : boost::mpl::minus<L, R>::type {};
- [link getting_started_17 copy-paste friendly version]
- The first command declares the `eval_binary_op` metafunction. The first and
- third arguments are the left and right operands and the second argument is the
- operator.
- [note
- Note that it does not satisfy the expectations of a
- [link metafunction template metafunction] since it takes the operator as a
- `char` and not as a `class` (or `typename`) argument. For simplicity, we will
- still call it a metafunction.
- ]
- The second and third commands define the operation for the cases when the
- operator is `+` and `-`. When the `eval_binary_op` metafunction is called,
- the C++ compiler chooses one of the definitions based on the operator. If you
- have functional programming experience this approach (pattern matching) might be
- familiar to you. Let's try `eval_binary_op` out:
- > eval_binary_op<boost::mpl::int_<11>, '+', boost::mpl::int_<2>>::type
- mpl_::integral_c<int, 13>
- > eval_binary_op<boost::mpl::int_<13>, '-', boost::mpl::int_<2>>::type
- mpl_::integral_c<int, 11>
- [link getting_started_18 copy-paste friendly version]
- You might also try to use it with an operator it does not expect (yet). For
- example `'*'`. You will see the C++ compiler complaining about that the
- requested version of the `eval_binary_op` template has not been defined. This
- solution can be extended and support for the `'*'` operator can always be added
- later.
- Let's write the [link metafunction metafunction] we can use from the folding
- parser to evaluate the expressions using `+` and `-` operators. This takes two
- arguments:
- * The partial result we have evaluated so far. (This used to be the summary we
- have evaluated so far, but we are making it a more general evaluation now).
- This is the left operand, a number.
- * The result of parsing `(+|-) <number>`. This a `vector` containing two
- elements: a character representing the operator (`+` or `-`) and the value of
- the `<number>`. The number is the right operand.
- Let's write the [link metafunction metafunction] `binary_op` that takes these
- arguments and calls `eval_binary_op`:
- > template <class S, class Item> \
- ...> struct binary_op : \
- ...> eval_binary_op< \
- ...> S, \
- ...> boost::mpl::at_c<Item, 0>::type::value, \
- ...> typename boost::mpl::at_c<Item, 1>::type \
- ...> > \
- ...> {};
- [link getting_started_19 copy-paste friendly version]
- This [link metafunction metafunction] takes the operator (the first element)
- and the right operand (the second element) from `Item`. The operator is a class
- representing a character, such as `mpl_::char_<'+'>`. To get the character value
- out of it, one has to access its `::value`. For example `mpl_::char<'+'>::value`
- is `'+'`. Since `eval_binary_op` takes this character value as its second
- argument, we had to pass `boost::mpl::at_c<Item, 0>::type::value` to it. Let's
- try it out:
- > binary_op<boost::mpl::int_<11>, boost::mpl::vector<boost::mpl::char_<'+'>, boost::mpl::int_<2>>>::type
- mpl_::integral_c<int, 13>
- We passed it a number (`11`) and a `vector` of a character (`+`) and another
- number (`2`). It added the two numbers as expected. Let's use this function as
- the third argument of [link foldl_start_with_parser `foldl_start_with_parser`]:
- > using exp_parser13 = \
- ...> build_parser< \
- ...> foldl_start_with_parser< \
- ...> sequence<one_of<plus_token, minus_token>, int_token>, \
- ...> int_token, \
- ...> boost::mpl::quote2<binary_op> \
- ...> > \
- ...> >;
- [link getting_started_20 copy-paste friendly version]
- It uses `binary_op` instead of `sum_items`. Let's try it out:
- > exp_parser13::apply<BOOST_METAPARSE_STRING("1 + 2 - 3")>::type
- mpl_::integral_c<int, 0>
- It returns the correct result.
- [endsect]
- [endsect]
- [section 7. Dealing with precedence]
- [note Note that you can find everything that has been included and defined so far [link before_7 here].]
- We support addition and subtraction. Let's support multiplication as well.
- [section 7.1. Adding support for the `*` operator]
- [note Note that you can find everything that has been included and defined so far [link before_7_1 here].]
- We can extend the solution we have built for addition and subtraction. To do
- that, we need to add support for multiplication to `eval_binary_op`:
- > #include <boost/mpl/times.hpp>
- > template <class L, class R> struct eval_binary_op<L, '*', R> : boost::mpl::times<L, R>::type {};
- [link getting_started_21 copy-paste friendly version]
- We had to include `<boost/mpl/times.hpp>` to get the `boost::mpl::times`
- [link metafunction metafunction] and then we could extend `eval_binary_op` to
- support the `*` operator as well. We can try it out:
- > eval_binary_op<boost::mpl::int_<3>, '*', boost::mpl::int_<4>>::type
- mpl_::integral_c<int, 12>
- This works as expected. Let's create a token for parsing the `*` symbol:
- > using times_token = token<lit_c<'*'>>;
- Now we can extend our parser to accept the `*` symbol as an operator:
- > using exp_parser14 = \
- ...> build_parser< \
- ...> foldl_start_with_parser< \
- ...> sequence<one_of<plus_token, minus_token, times_token>, int_token>, \
- ...> int_token, \
- ...> boost::mpl::quote2<binary_op> \
- ...> > \
- ...> >;
- [link getting_started_22 copy-paste friendly version]
- This version accepts either a `+`, a `-` or a `*` symbol as the operator. Let's
- try this out:
- > exp_parser14::apply<BOOST_METAPARSE_STRING("2 * 3")>::type
- mpl_::integral_c<int, 6>
- This works as expected. Let's try another, slightly more complicated expression:
- > exp_parser14::apply<BOOST_METAPARSE_STRING("1 + 2 * 3")>::type
- mpl_::integral_c<int, 9>
- This returns a wrong result. The value of this expression should be `7`, not
- `9`. The problem with this is that our current implementation does not take
- operator precedence into account. It treats this expression as `(1 + 2) * 3`
- while we expect it to be `1 + (2 * 3)` since addition has higher precedence than
- multiplication.
- [endsect]
- [section 7.2. Adding support for precedence of operators]
- [note Note that you can find everything that has been included and defined so far [link before_7_2 here].]
- Let's make it possible for different operators to have different precedence. To
- do this, we define a new parser for parsing expressions containing only `*`
- operators (that is the operator with the lowest precedence):
- > using mult_exp1 = foldl_start_with_parser<sequence<times_token, int_token>, int_token, boost::mpl::quote2<binary_op>>;
- `mult_exp1` can parse expressions containing only `*` operator. For example
- `3 * 2` or `6 * 7 * 8`. Now we can create a parser supporting only the `+` and
- `-` operators but instead of separating ['numbers] with these operators we will
- separate ['expressions containing only `*` operators]. This means that the
- expression `1 * 2 + 3 * 4` is interpreted as the expressions `1 * 2` and `3 * 4`
- separated by a `+` operator. A number (eg. `13`) is the special case of an
- ['expression containing only `*` operators].
- Here is the parser implementing this:
- > using exp_parser15 = \
- ...> build_parser< \
- ...> foldl_start_with_parser< \
- ...> sequence<one_of<plus_token, minus_token>, mult_exp1>, \
- ...> mult_exp1, \
- ...> boost::mpl::quote2<binary_op> \
- ...> > \
- ...> >;
- [link getting_started_23 copy-paste friendly version]
- Note that this is almost the same as `exp_parser13`. The only difference is that
- it uses `mult_exp1` everywhere, where `exp_parser13` was using `int_token`.
- Let's try it out:
- > exp_parser15::apply<BOOST_METAPARSE_STRING("1 + 2 * 3")>::type
- mpl_::integral_c<int, 7>
- This takes the precedence rules into account. The following diagram shows how it
- works:
- [$images/metaparse/tutorial_diag8.png [width 80%]]
- Subexpressions using `*` operators only are evaluated (by `mult_exp1`) and
- treated as single units while interpreting expressions using `+` and `-`
- operators. Numbers not surrounded by `*` operators are treated also as operators
- using `*` only (containing no operations but a number).
- If you need more layers (eg. introducing the `^` operator) you can extend this
- solution with further layers. The order of the layers determine the precedence
- of the operators.
- [endsect]
- [endsect]
- [section 8. Dealing with associativity]
- [note Note that you can find everything that has been included and defined so far [link before_8 here].]
- Let's add division to our calculator language. Since it has the same precedence
- as multiplication, it should be added to that layer:
- > #include <boost/mpl/divides.hpp>
- > template <class L, class R> struct eval_binary_op<L, '/', R> : boost::mpl::divides<L, R>::type {};
- > using divides_token = token<lit_c<'/'>>;
- > using mult_exp2 = \
- ...> foldl_start_with_parser< \
- ...> sequence<one_of<times_token, divides_token>, int_token>, \
- ...> int_token, \
- ...> boost::mpl::quote2<binary_op> \
- ...> >;
- > using exp_parser16 = \
- ...> build_parser< \
- ...> foldl_start_with_parser< \
- ...> sequence<one_of<plus_token, minus_token>, mult_exp2>, \
- ...> mult_exp2, \
- ...> boost::mpl::quote2<binary_op> \
- ...> > \
- ...> >;
- [link getting_started_24 copy-paste friendly version]
- We have to include `<boost/mpl/divides.hpp>` to get a
- [link metafunction metafunction] for doing a division. We need to extend the
- `eval_binary_op` [link metafunction metafunction] to support division as well.
- We had to introduce a new token, `divides_token` that can parse the `/` symbol.
- We have extended `mult_exp1` to accept either a `times_token` or a
- `divides_token` as the operator. This extended parser is called `mult_exp2`.
- We have written a new parser, `exp_parser16` which is the same as `exp_parser15`
- but uses `mult_exp2` instead of `mult_exp1`. This can parse expressions using
- division as well (and this new operator has the right precedence). Let's try it
- out:
- > exp_parser16::apply<BOOST_METAPARSE_STRING("8 / 4")>::type
- mpl_::integral_c<int, 2>
- This works as expected. But what should be the value of `8 / 4 / 2`? The answer
- can be either `1` or `4` depending on the associativity of the division
- operator. If it is left associative, then this expressions is interpreted as
- `(8 / 4) / 2` and the result is `1`. If it is right associative, this
- expression is interpreted as `8 / (4 / 2)` and the result is `4`.
- Try to guess which result our current implementation gives before trying it
- out. Once you have verified the current behaviour, continue reading.
- [section 8.1. Understanding the current implementation]
- [note Note that you can find everything that has been included and defined so far [link before_8_1 here].]
- Here is a diagram showing how our current parser processes the expression
- `8 / 4 / 2`:
- [$images/metaparse/tutorial_diag8.png [width 70%]]
- It takes the first number, `8`, divides it by the second one, `4` and then it
- divides the result with the third one, `2`. This means, that in our current
- implementation, division is left associative: `8 / 4 / 2` means `(8 / 4) / 2`.
- Another thing to note is that the initial value is `8` and the list of values
- [link foldl `foldl`] iterates over is "`/ 4`", "`/ 2`".
- [endsect]
- [section 8.2. Folding in reverse order]
- [note Note that you can find everything that has been included and defined so far [link before_8_2 here].]
- [link foldl `foldl`] applies a parser repeatedly and iterates over the parsing
- results from ['left] to right. (This is where the `l` in the name comes from).
- Metaparse provides another folding parser combinator, [link foldr `foldr`]. It
- applies a parser on the input as well but it iterates from ['right] to left over
- the results.
- Similarly to [link foldl_start_with_parser `foldl_start_with_parser`], Metaparse
- provides [link foldr_start_with_parser `foldr_start_with_parser`] as well. A
- major difference between the two
- ([link foldl_start_with_parser `foldl_start_with_parser`] and
- [link foldr_start_with_parser `foldr_start_with-parser`]) solutions is that
- while [link foldl_start_with_parser `foldl_start_with_parser`] treats the
- ['first] number as a special one,
- [link foldr_start_with_parser `foldr_start_with_parser`] treats the ['last]
- number as a special one. This might sound strange, but think about it: if you
- want to summarise the elements from right to left, your starting value should be
- the last element, not the first one, as the first one is the one you visit last.
- Due to the above difference
- [link foldr_start_with_parser `foldr_start_with_parser`] is not a drop-in
- replacement of [link foldl_start_with_parser `foldl_start_with_parser`]. While
- the list of values [link foldl `foldl`] was iterating over is "`8`", "`/ 4`",
- "`/ 2`", the list of values [link foldr `foldlr`] has to iterate over is "`2`",
- "`4 /`", "`8 /`".
- This means that the function we use to ['"add"] a new value to the already
- evaluated part of the expression (this has been `binary_op` so far) has to be
- prepared for taking the next operator and operand in a reverse order (eg. by
- taking "`4 /`" instead of "`/ 4`"). We write another
- [link metafunction metafunction] for this purpose:
- > template <class S, class Item> \
- ...> struct reverse_binary_op : \
- ...> eval_binary_op< \
- ...> typename boost::mpl::at_c<Item, 0>::type, \
- ...> boost::mpl::at_c<Item, 1>::type::value, \
- ...> S \
- ...> > \
- ...> {};
- [link getting_started_25 copy-paste friendly version]
- There are multiple differences between `binary_op` and `reverse_binary_op`:
- * The `Item` argument, which is a `vector` is expected to be
- `[operator, operand]` in `binary_op` and `[operand, operator]` in
- `reverse_binary_op`.
- * Both versions use `eval_binary_op` to evaluate the subexpression, but
- `binary_op` treats `S`, the value representing the already evaluated part of
- the expression as the left operand, while `reverse_binary_op` treats it as the
- right operand. This is because in the first case we are going from left to
- right while in the second case we are going from right to left.
- We need to include [link foldr_start_with_parser `foldr_start_with_parser`]:
- > #include <boost/metaparse/foldr_start_with_parser.hpp>
- We can rewrite `mult_exp` using
- [link foldr_start_with_parser `foldr_start_with_parser`]:
- > using mult_exp3 = \
- ...> foldr_start_with_parser< \
- ...> sequence<int_token, one_of<times_token, divides_token>>, /* The parser applied repeatedly */ \
- ...> int_token, /* The parser parsing the last number */ \
- ...> boost::mpl::quote2<reverse_binary_op> /* The function called for every result */ \
- ...> /* of applying the above parser */ \
- ...> >;
- [link getting_started_26 copy-paste friendly version]
- It is almost the same as `mult_exp2`, but ...
- * ... the parser applied repeatedly parses `<number> <operator>` elements
- instead of `<operator> <number>` elements (what `mult_exp2` did).
- * ... this version uses `reverse_binary_op` instead of `binary_op` as the
- function that is called for every result of applying the above parser.
- We can create a new version of `exp_parser` that uses `mult_exp3` instead of
- `mult_exp2`:
- > using exp_parser17 = \
- ...> build_parser< \
- ...> foldl_start_with_parser< \
- ...> sequence<one_of<plus_token, minus_token>, mult_exp3>, \
- ...> mult_exp3, \
- ...> boost::mpl::quote2<binary_op> \
- ...> > \
- ...> >;
- [link getting_started_27 copy-paste friendly version]
- The only difference between `exp_parser17` and the previous version,
- `exp_parser16` is that it uses the updated version of `mult_exp`. Let's try this
- parser out:
- > exp_parser17::apply<BOOST_METAPARSE_STRING("8 / 4 / 2")>::type
- mpl_::integral_c<int, 4>
- This version of the parser gives ['the other] possible result. The one you get
- when division is right associative, which means that the above expression is
- evaluated as `8 / (4 / 2)`. Here is a diagram showing how the
- [link foldr_start_with_parser `foldr_start_with_parser`]-based solution works:
- [$images/metaparse/tutorial_diag10.png [width 70%]]
- To make it easier to compare the two solutions, here is a diagram showing the
- two approaches side-by-side:
- [$images/metaparse/tutorial_diag11.png [width 100%]]
- As we have seen, the associativity of the operators can be controlled by
- choosing between folding solutions. The folding solutions going from left to
- right implement left associativity, while the solutions going from right to left
- implement right associativity.
- [note
- Note that folding solutions going from left to right is implemented in a more
- efficient way than folding from right to left. Therefore when both solutions
- can be used you should prefer folding from left to right.
- ]
- [endsect]
- [endsect]
- [section 9. Dealing with unary operators]
- [note Note that you can find everything that has been included and defined so far [link before_9 here].]
- Our calculator language provides no direct support for negative numbers. To get
- a negative number, we need to do a subtraction. For example to get the number
- `-13` we need to evaluate the expression `0 - 13`.
- We will implement `-` as a unary operator. Therefore the expression `-13` won't
- be a ['negative number]. It will be the unary `-` operator applied on the number
- `13`.
- Since `-` is an operator, it might be used multiple times. So the expression
- `---13` is also valid and gives the same result as `-13`. This means that any
- number of `-` symbols are valid before a number.
- Our parser can be extended to support the unary `-` operator by adding a new
- layer to the list of precedence layers. This should have the lowest precedence,
- which means that we should use this new layer where we have been using
- `int_token`. Let's write a new parser:
- > #include <boost/mpl/negate.hpp>
- > using unary_exp1 = \
- ...> foldr_start_with_parser< \
- ...> minus_token, \
- ...> int_token, \
- ...> boost::mpl::lambda<boost::mpl::negate<boost::mpl::_1>>::type \
- ...> >;
- [link getting_started_28 copy-paste friendly version]
- We had to include `<boost/mpl/negate.hpp>` to get a
- [link metafunction metafunction] we can negate a value with.
- `unary_exp1` is implemented with right to left folding: it starts from the
- number (parsed by `int_token`) and processes the `-` symbols one by one. The
- function to be called for each `-` symbol is a lambda expression that negates
- the number. So the number is negated for every `-` symbol.
- We can implement a new version of `mult_exp` and `exp_parser`. They are the same
- as `mult_exp2` and `exp_parser16`. The only difference is that they (directly
- only `exp_parser18`) use `unary_exp1` instead of `int_token`.
- > using mult_exp4 = \
- ...> foldl_start_with_parser< \
- ...> sequence<one_of<times_token, divides_token>, unary_exp1>, \
- ...> unary_exp1, \
- ...> boost::mpl::quote2<binary_op> \
- ...> >;
- > using exp_parser18 = \
- ...> build_parser< \
- ...> foldl_start_with_parser< \
- ...> sequence<one_of<plus_token, minus_token>, mult_exp4>, \
- ...> mult_exp4, \
- ...> boost::mpl::quote2<binary_op> \
- ...> > \
- ...> >;
- [link getting_started_29 copy-paste friendly version]
- Let's try these new parsers out:
- > exp_parser18::apply<BOOST_METAPARSE_STRING("---13")>::type
- mpl_::integral_c<int, -13>
- > exp_parser18::apply<BOOST_METAPARSE_STRING("13")>::type
- mpl_::integral_c<int, 13>
- [link getting_started_30 copy-paste friendly version]
- It can deal with negative numbers correctly.
- [endsect]
- [section 10. Dealing with parens]
- Our parsers already support the precedence of the different operators. Let's add
- support for parens as well, so users can override the precedence rules when they
- need to.
- We can add a new parser for parsing (and evaluating) expressions in parens.
- First we introduce tokens for parsing the `(` and `)` symbols:
- > using lparen_token = token<lit_c<'('>>;
- > using rparen_token = token<lit_c<')'>>;
- [link getting_started_31 copy-paste friendly version]
- A paren can contain an expression with any operators in it, so we add a parser
- for parsing (and evaluating) an expression containing operators of the highest
- precedence:
- > using plus_exp1 = \
- ...> foldl_start_with_parser< \
- ...> sequence<one_of<plus_token, minus_token>, mult_exp4>, \
- ...> mult_exp4, \
- ...> boost::mpl::quote2<binary_op> \
- ...> >;
- [link getting_started_32 copy-paste friendly version]
- This was just a refactoring of our last parser for the calculator language. We
- can build the parser for our calculator language by using
- [link build_parser `build_parser`]`<plus_exp1>` now. Let's write a parser for a
- paren expression:
- > using paren_exp1 = sequence<lparen_token, plus_exp1, rparen_token>;
- This definition parses a left paren, then a complete expression followed by a
- right paren. The result of parsing a paren expression is a `vector` of three
- elements: the `(` character, the value of the expression and the `)` character.
- We only need the value of the expression, which is the middle element. We could
- wrap the whole thing with a [link transform `transform`] that gets the middle
- element and throws the rest away, but we don't need to. This is such a common
- pattern, that Metaparse provides [link middle_of `middle_of`] for this:
- > #include <boost/metaparse/middle_of.hpp>
- > using paren_exp2 = middle_of<lparen_token, plus_exp1, rparen_token>;
- [link getting_started_33 copy-paste friendly version]
- This implementation is almost the same as `paren_exp1`. The difference is that
- the result of parsing will be the value of the wrapped expression (the result of
- the `plus_exp1` parser).
- Let's define a parser for a primary expression which is either a number or an
- expression in parens:
- > using primary_exp1 = one_of<int_token, paren_exp2>;
- This parser accepts either a number using `int_token` or an expression in parens
- using `paren_exp1`.
- Everywhere, where one can write a number (parsed by `int_token`), one can write
- a complete expression in parens as well. Our current parser implementation
- parses `int_token`s in `unary_exp`, therefore we need to change that to use
- `primary_exp` instead of `int_token`.
- There is a problem here: this makes the definitions of our parsers ['recursive].
- Think about it:
- * `plus_exp` uses `mult_exp`
- * `mult_exp` uses `unary_exp`
- * `unary_exp` uses `primary_exp`
- * `primary_exp` uses `paren_exp`
- * `paren_exp` uses `plus_exp`
- [note
- Since we are versioning the different parser implementations in Metashell
- (`paren_exp1`, `paren_exp2`, etc) you might try to define these recursive
- parsers and it might seem to work for the first time. In that case, when you
- later try creating a parser as part of a library (save your Metashell
- environment to a file or re-implement the important/successful elements) you
- face this issue.
- ]
- We have been using type aliases (`typedef` and `using`) for defining the
- parsers. We can do it as long as their definition is not recursive. We can not
- refer to a type alias until we have defined it and type aliases can not be
- forward declared, so we can't find a point in the recursive cycle where we could
- start defining things.
- A solution for this is making one of the parsers a new class instead of a type
- alias. Classes can be forward declared, therefore we can declare the class,
- implement the rest of the parsers as they can refer to that class and then
- define the class at the end.
- Let's make `plus_exp` a class. So as a first step, let's forward declare it:
- > struct plus_exp2;
- Now we can write the rest of the parsers and they can refer to `plus_exp2`:
- > using paren_exp3 = middle_of<lparen_token, plus_exp2, rparen_token>;
- > using primary_exp2 = one_of<int_token, paren_exp2>;
- > using unary_exp2 = \
- ...> foldr_start_with_parser< \
- ...> minus_token, \
- ...> primary_exp2, \
- ...> boost::mpl::lambda<boost::mpl::negate<boost::mpl::_1>>::type \
- ...> >;
- > using mult_exp5 = \
- ...> foldl_start_with_parser< \
- ...> sequence<one_of<times_token, divides_token>, unary_exp2>, \
- ...> unary_exp2, \
- ...> boost::mpl::quote2<binary_op> \
- ...> >;
- [link getting_started_34 copy-paste friendly version]
- There is nothing new in the definition of these parsers. They build up the
- hierarchy we have worked out in the earlier sections of this tutorial. The only
- element missing is `plus_exp2`:
- > struct plus_exp2 : \
- ...> foldl_start_with_parser< \
- ...> sequence<one_of<plus_token, minus_token>, mult_exp5>, \
- ...> mult_exp5, \
- ...> boost::mpl::quote2<binary_op> \
- ...> > {};
- [link getting_started_35 copy-paste friendly version]
- This definition makes use of inheritance instead of type aliasing. Now we can
- write the parser for the calculator that supports parens as well:
- > using exp_parser19 = build_parser<plus_exp2>;
- Let's try this parser out:
- > exp_parser19::apply<BOOST_METAPARSE_STRING("(1 + 2) * 3")>::type
- mpl_::integral_c<int, 9>
- Our parser accepts and can deal with parens in the expressions.
- [endsect]
- [#dealing_with_invalid_input]
- [section 11. Dealing with invalid input]
- So far we have been focusing on parsing valid user input. However, users of our
- parsers will make mistakes and we should help them finding the source of the
- problem. And we should make this process not too painful.
- The major difficulty in error reporting is that we have no direct way of showing
- error messages to the user. The parsers are template metaprograms. When they
- detect that the input is invalid, they can make the compilation fail and the
- compiler (running the metaprogram) display an error message. What we can do is
- making those error messages short and contain all information about the parsing
- error. We should make it easy to find this information in whatever the compiler
- displays.
- So let's try to parse some invalid expression and let's see what happens:
- > exp_parser19::apply<BOOST_METAPARSE_STRING("hello")>::type
- << compilation error >>
- You will get a lot (if you have seen error messages coming from template
- metaprograms you know: this is ['not] a lot.) of error messages. Take a closer
- look. It contains this:
- x__________________PARSING_FAILED__________________x<
- 1, 1,
- boost::metaparse::v1::error::literal_expected<'('>
- >
- You can see a formatted version above. There are no line breaks in the real
- output. This is relatively easy to spot (thanks to the `____________` part) and
- contains answers to the main questions one has when parsing fails:
- * ['where] is the error? It is column `1` in line `1` (inside
- [link BOOST_METAPARSE_STRING `BOOST_METAPARSE_STRING`]). This is the `1, 1`
- part.
- * ['what] is the problem? `literal_expected<'('>`. This is a bit misleading, as
- it contains only a part of the problem. An open paren is not the only
- acceptable token here, a number would also be fine. This misleading error
- message is ['our] fault: ['we] (the parser authors) need to make the parsing
- errors more descriptive.
- [section 11.1. Improving the error messages]
- So how can we improve the error messages? Let's look at what went wrong in the
- previous case:
- * The input was `hello`.
- * `plus_exp2` tried to parse it.
- * `plus_exp2` tried to parse it using `mult_exp5` (assuming that this is the
- initial `mult_exp` in the list of `+` / `-` separated `mult_exp`s).
- * so `mult_exp5` tried to parse it.
- * `mult_exp5` tried to parse it using `unary_exp2` (assuming that this is the
- initial `unary_exp` in the list of `*` / `/` separated `unary_exp`s).
- * so `unary_exp2` tried to parse it.
- * `unary_exp2` parsed all of the `-` symbols using `minus_token`. There were
- none of them (the input started with an `h` character).
- * so `unary_exp2` tried to parse it using `primary_exp2`.
- * `primary_exp2` is: [link one_of `one_of`]`<int_token, paren_exp2>`. It tried
- parsing the input with `int_token` (which failed) and then with `paren_exp2`
- (which failed as well). So [link one_of `one_of`] could not parse the input
- with any of the choices and therefore it failed as well. In such situations
- `one_of` checks which parser made the most progress (consumed the most
- characters of the input) before failing and assumes, that that is the parser
- the user intended to use, thus it returns the error message coming from that
- parser. In this example none of the parsers could make any progress, in which
- case `one_of` returns the error coming from the last parser in the list. This
- was `paren_exp2`, and it expects the expression to start with an open paren.
- This is where the error message came from. The rest of the layers did not
- change or improve this error message so this was the error message displayed
- to the user.
- We, the parser authors know: we expect a primary expression there. When
- [link one_of `one_of`] fails, it means that none was found.
- [endsect]
- [section 11.2. Defining custom errors]
- To be able to return custom error messages (like `missing_primary_expression`)
- to the user, we need to define those error messages first. The error messages
- are represented by classes with some requirements:
- * It should have a static method called `get_value()` returning a `std::string`
- containing the description of the error.
- * It should be a [link metaprogramming_value template metaprogramming value].
- These classes are called [link parsing_error_message parsing error message]s.
- To make it easy to implement such classes and to make it difficult (if not
- impossible) to forget to fulfill a requirement, Metaparse provides a macro for
- defining these classes. To get this macro, include the following header:
- > #include <boost/metaparse/define_error.hpp>
- Let's define the [link parsing_error_message parsing error message]:
- > BOOST_METAPARSE_DEFINE_ERROR(missing_primary_expression, "Missing primary expression");
- This defines a class called `missing_primary_expression` representing this error
- message. What we need to do is making our parser return this error message when
- [link one_of `one_of`] fails.
- Let's define `plus_exp` and `paren_exp` first. Their definition does not change:
- > struct plus_exp3;
- > using paren_exp4 = middle_of<lparen_token, plus_exp3, rparen_token>;
- [link getting_started_36 copy-paste friendly version]
- When the input contains no number (parsed by `int_token`) and no paren
- expression (parsed by `paren_exp4`), we should return the
- `missing_primary_expression` error message. We can do it by adding a third
- parser to `one_of<int_token, paren_exp4, ...>` which always fails with this
- error message. Metaparse provides [link fail `fail`] for this:
- > #include <boost/metaparse/fail.hpp>
- Now we can define the `primary_exp` parser using it:
- > using primary_exp3 = one_of<int_token, paren_exp4, fail<missing_primary_expression>>;
- It adds [link fail `fail`]`<missing_primary_expression>` to `one_of` as the
- last element. Therefore if none of the "real" cases parse the input ['and] none
- of them makes any progress before failing, the error message will be
- `missing_primary_expression`.
- We need to define the rest of the parsers. Their definition is the same as
- before:
- > using unary_exp3 = \
- ...> foldr_start_with_parser< \
- ...> minus_token, \
- ...> primary_exp3, \
- ...> boost::mpl::lambda<boost::mpl::negate<boost::mpl::_1>>::type \
- ...> >;
- > using mult_exp6 = \
- ...> foldl_start_with_parser< \
- ...> sequence<one_of<times_token, divides_token>, unary_exp3>, \
- ...> unary_exp3, \
- ...> boost::mpl::quote2<binary_op> \
- ...> >;
- > struct plus_exp3 : \
- ...> foldl_start_with_parser< \
- ...> sequence<one_of<plus_token, minus_token>, mult_exp6>, \
- ...> mult_exp6, \
- ...> boost::mpl::quote2<binary_op> \
- ...> > {};
- > using exp_parser20 = build_parser<plus_exp3>;
- [link getting_started_37 copy-paste friendly version]
- We can try to give our new parser an invalid input:
- > exp_parser20::apply<BOOST_METAPARSE_STRING("hello")>::type
- << compilation error >>
- ..... x__________________PARSING_FAILED__________________x<1, 1, missing_primary_expression> ....
- << compilation error >>
- The error message is now more specific to the calculator language. This covers
- only one case, where the error messages can be improved. Other cases (eg.
- missing closing parens, missing operators, etc) can be covered in a similar way.
- [endsect]
- [section 11.3. Missing closing parens]
- Missing closing parens are common errors. Let's see how our parsers report them:
- > exp_parser20::apply<BOOST_METAPARSE_STRING("(1+2")>::type
- << compilation error >>
- ..... x__________________PARSING_FAILED__________________x<1, 5, unpaired<1, 1, literal_expected<')'>>> ....
- << compilation error >>
- The parser could detect that there is a missing paren and the error report
- points to the open paren which is not closed. This looks great, but we are not
- done yet. Let's try a slightly more complex input:
- > exp_parser20::apply<BOOST_METAPARSE_STRING("0+(1+2")>::type
- mpl_::integral_c<int, 0>
- This is getting strange now. We parse the `+ <mult_exp>` elements using
- [link foldl_start_with_parser `foldl_start_with_parser`] (see the definition of
- `plus_exp3`). [link foldl_start_with_parser `foldl_start_with_parser`] parses
- the input as long as it can and stops when it fails to parse it. In the above
- input, it parses `0` as the initial element and then it tries to parse the first
- `+ <mult_exp>` element. But parsing the `<mult_exp>` part fails because of the
- missing closing paren. So
- [link foldl_start_with_parser `foldl_start_with_parser`] stops and ignores this
- failing part of the input.
- The result of the above is that we parse only the `0` part of the input, ignore
- the "garbage" at the end and assume that the value of the expression is `0`.
- This could be fixed by using [link entire_input `entire_input`]. Our parser
- would reject the input (because of the "garbage" at the end), but the error
- message would not be useful. So we take a different approach.
- When [link foldl_start_with_parser `foldl_start_with_parser`] stops, we should
- check if there is an extra broken `+ <mult_exp>` there or not. When there is, we
- should report what is wrong with that broken `+ <mult_exp>` (eg. a missing
- closing paren). Metaparse provides [link fail_at_first_char_expected
- `fail_at_first_char_expected`] to implement such validations.
- [link fail_at_first_char_expected `fail_at_first_char_expected`]`<parser>`
- checks how `parser` fails to parse the input: when it fails right at the first
- character, [link fail_at_first_char_expected `fail_at_first_char_expected`]
- assumes that there is no garbage and accepts the input. When `parser` consumes
- characters from the input before failing,
- [link fail_at_first_char_expected `fail_at_first_char_expected`] assumes that
- there is a broken expression and propagates the error. It can be used the
- following way:
- > #include <boost/metaparse/fail_at_first_char_expected.hpp>
- > #include <boost/metaparse/first_of.hpp>
- > struct plus_exp4 : \
- ...> first_of< \
- ...> foldl_start_with_parser< \
- ...> sequence<one_of<plus_token, minus_token>, mult_exp6>, \
- ...> mult_exp6, \
- ...> boost::mpl::quote2<binary_op> \
- ...> >, \
- ...> fail_at_first_char_expected< \
- ...> sequence<one_of<plus_token, minus_token>, mult_exp6> \
- ...> > \
- ...> > {};
- > using exp_parser21 = build_parser<plus_exp4>;
- [link getting_started_38 copy-paste friendly version]
- [link first_of `first_of`] is similar to [link middle_of `middle_of`], but
- keeps the result of the first element, not the middle one. We use it to keep the
- "real" result (the result of
- [link foldl_start_with_parser `foldl_start_with_parser`]) and to throw the dummy
- result coming from
- [link fail_at_first_char_expected `fail_at_first_char_expected`] away when
- there is no broken expression at the end. [link first_of `first_of`] propagates
- any error coming from
- [link fail_at_first_char_expected `fail_at_first_char_expected`].
- Let's try this new expression parser out with a missing closing paren:
- > exp_parser21::apply<BOOST_METAPARSE_STRING("0+(1+2")>::type
- << compilation error >>
- ..... x__________________PARSING_FAILED__________________x<1, 7, unpaired<1, 3, literal_expected<')'>>> ....
- << compilation error >>
- This works as expected now: it tells us that there is a missing paren and it
- points us the open paren which is not closed.
- [section 11.3.1. Simplifying the parser]
- Our parser provides useful error messages for missing closing parens, however,
- the implementation of the parser (`plus_exp4`) is long and repetitive: it
- contains the parser for the repeated element
- ([link sequence `sequence`]`<`[link one_of `one_of`]`<plus_token, minus_token>, mult_exp6>`) twice, and that is not ideal.
- `plus_exp4` uses [link foldl_start_with_parser `foldl_start_with_parser`] to
- implement repetition. Metaparse provides
- [link foldl_reject_incomplete_start_with_parser `foldl_reject_incomplete_start_with_parser`]
- which does the same we did with [link first_of `first_of`],
- [link foldl_start_with_parser `foldl_start_with_parser`] and
- [link fail_at_first_char_expected `fail_at_first_char_expected`] together:
- > #include <boost/metaparse/foldl_reject_incomplete_start_with_parser.hpp>
- > struct plus_exp5 : \
- ...> foldl_reject_incomplete_start_with_parser< \
- ...> sequence<one_of<plus_token, minus_token>, mult_exp6>, \
- ...> mult_exp6, \
- ...> boost::mpl::quote2<binary_op> \
- ...> > {};
- > using exp_parser22 = build_parser<plus_exp5>;
- [link getting_started_39 copy-paste friendly version]
- It parses the input using
- [link sequence `sequence`]`<`[link one_of `one_of`]`<plus_token, minus_token>, mult_exp6>`)
- repeatedly. When it fails,
- [link foldl_reject_incomplete_start_with_parser `foldl_reject_incomplete_start_with_parser`]
- checks if it consumed any character before failing (the same as what
- [link fail_at_first_char_expected `fail_at_first_char_expected`] does), and if
- yes, then
- [link foldl_reject_incomplete_start_with_parser `foldl_reject_incomplete_start_with_parser`]
- fails.
- This makes the implementation of the repetition with advanced error reporting
- simpler. Let's try it out:
- > exp_parser22::apply<BOOST_METAPARSE_STRING("0+(1+2")>::type
- << compilation error >>
- ..... x__________________PARSING_FAILED__________________x<1, 7, unpaired<1, 3, literal_expected<')'>>> ....
- << compilation error >>
- Note that other folding parsers have their `f` versions as well (eg.
- [link foldr_reject_incomplete `foldr_reject_incomplete`],
- [link foldl_reject_incomplete1 `foldl_reject_incomplete1`], etc).
- [endsect]
- [section 11.3.2. Using `foldl_reject_incomplete_start_with_parser` at other places as well]
- We have replaced one [link foldl_start_with_parser `foldl_start_with_parser`]
- with
- [link foldl_reject_incomplete_start_with_parser `foldl_reject_incomplete_start_with_parser`].
- Other layers (`mult_exp`, `unary_exp`, etc) use folding as well. Let's use it at
- all layers:
- > struct plus_exp6;
- > using paren_exp5 = middle_of<lparen_token, plus_exp6, rparen_token>;
- > using primary_exp4 = one_of<int_token, paren_exp5, fail<missing_primary_expression>>;
- > using unary_exp4 = \
- ...> foldr_start_with_parser< \
- ...> minus_token, \
- ...> primary_exp4, \
- ...> boost::mpl::lambda<boost::mpl::negate<boost::mpl::_1>>::type \
- ...> >;
- > using mult_exp7 = \
- ...> foldl_reject_incomplete_start_with_parser< \
- ...> sequence<one_of<times_token, divides_token>, unary_exp4>, \
- ...> unary_exp4, \
- ...> boost::mpl::quote2<binary_op> \
- ...> >;
- > struct plus_exp6 : \
- ...> foldl_reject_incomplete_start_with_parser< \
- ...> sequence<one_of<plus_token, minus_token>, mult_exp7>, \
- ...> mult_exp7, \
- ...> boost::mpl::quote2<binary_op> \
- ...> > {};
- > using exp_parser23 = build_parser<plus_exp6>;
- [link getting_started_40 copy-paste friendly version]
- [note
- Note that `unary_exp4` uses
- [link foldr_start_with_parser `foldr_start_with_parser`] instead of
- `foldr_reject_incomplete_start_with_parser`. The reason behind it is that there
- is no `foldr_reject_incomplete_start_with_parser`.
- [link foldr_start_with_parser `foldr_start_with_parser`] applies the
- `primary_exp4` parser when `minus_token` does not accept the input any more.
- Therefore, it is supposed to catch the errors of incomplete expressions after
- the repetition.
- ]
- Let's try different invalid expressions:
- > exp_parser23::apply<BOOST_METAPARSE_STRING("1+(2*")>::type
- << compilation error >>
- ..... x__________________PARSING_FAILED__________________x<1, 6, missing_primary_expression> ....
- << compilation error >>
- > exp_parser23::apply<BOOST_METAPARSE_STRING("1+(2*3")>::type
- << compilation error >>
- ..... x__________________PARSING_FAILED__________________x<1, 7, unpaired<1, 3, literal_expected<')'>>> ....
- << compilation error >>
- [endsect]
- [endsect]
- [endsect]
- [section 12. Summary]
- This tutorial showed you how to build a parser for a calculator language. Now
- that you understand how to do this, you should be able to use the same
- techniques and building blocks presented here to build a parser for your own
- language. You should start building the parser and once you face a problem (eg.
- you need to add parens or you need better error messages) you can always return
- to this tutorial and read the section showing you how to deal with those
- situations.
- [endsect]
- [section Copy-paste friendly code examples]
- [include getting_started_0.qbk]
- [include getting_started_1.qbk]
- [include getting_started_2.qbk]
- [include getting_started_3.qbk]
- [include getting_started_4.qbk]
- [include getting_started_5.qbk]
- [include getting_started_6.qbk]
- [include getting_started_7.qbk]
- [include getting_started_8.qbk]
- [include getting_started_9.qbk]
- [include getting_started_10.qbk]
- [include getting_started_11.qbk]
- [include getting_started_12.qbk]
- [include getting_started_13.qbk]
- [include getting_started_14.qbk]
- [include getting_started_15.qbk]
- [include getting_started_16.qbk]
- [include getting_started_17.qbk]
- [include getting_started_18.qbk]
- [include getting_started_19.qbk]
- [include getting_started_20.qbk]
- [include getting_started_21.qbk]
- [include getting_started_22.qbk]
- [include getting_started_23.qbk]
- [include getting_started_24.qbk]
- [include getting_started_25.qbk]
- [include getting_started_26.qbk]
- [include getting_started_27.qbk]
- [include getting_started_28.qbk]
- [include getting_started_29.qbk]
- [include getting_started_30.qbk]
- [include getting_started_31.qbk]
- [include getting_started_32.qbk]
- [include getting_started_33.qbk]
- [include getting_started_34.qbk]
- [include getting_started_35.qbk]
- [include getting_started_36.qbk]
- [include getting_started_37.qbk]
- [include getting_started_38.qbk]
- [include getting_started_39.qbk]
- [include getting_started_40.qbk]
- [endsect]
- [section Definitions before each section]
- [include before_3.qbk]
- [include before_3_1.qbk]
- [include before_3_2.qbk]
- [include before_3_3.qbk]
- [include before_4.qbk]
- [include before_4_1.qbk]
- [include before_4_2.qbk]
- [include before_5.qbk]
- [include before_5_1.qbk]
- [include before_5_2.qbk]
- [include before_5_2_1.qbk]
- [include before_5_2_2.qbk]
- [include before_5_2_3.qbk]
- [include before_5_2_4.qbk]
- [include before_6.qbk]
- [include before_6_1.qbk]
- [include before_6_2.qbk]
- [include before_7.qbk]
- [include before_7_1.qbk]
- [include before_7_2.qbk]
- [include before_8.qbk]
- [include before_8_1.qbk]
- [include before_8_2.qbk]
- [include before_9.qbk]
- [include before_10.qbk]
- [include before_11.qbk]
- [include before_11_1.qbk]
- [include before_11_2.qbk]
- [include before_11_3.qbk]
- [include before_11_3_1.qbk]
- [include before_11_3_2.qbk]
- [include before_12.qbk]
- [endsect]
- [endsect]
|