1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932 |
- /*=============================================================================
- Boost.Wave: A Standard compliant C++ preprocessor library
- Spirit based lexer
-
- http://www.boost.org/
- Copyright (c) 2001, Daniel C. Nuffer.
- Copyright (c) 2001-2012 Hartmut Kaiser.
- Distributed under the Boost Software License, Version 1.0. (See accompanying
- file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- TODO List:
- X callback objects (called when a match is made.)
- X callback passed first & last iterator, and
- a reference to a lexercontrol object that supports some
- operations on the lexer.
- set state
- terminate
- state stack (push, pop, top)
- set new token return value
- ignore the current token
- yymore
- get length of matched token
- get current lexer state
- X DFA serialization to save recomputing the DFA.
- lexer states.
- organize the file into hpp and ipp. arrange the classes in a logical order.
- use buffering - iterator only needs be an input iterator,
- lexer & callback are not templatized on iterator type, but char type.
- next_token is templatized on iterator type.
- beginning/ending contexts.
- ^ and $
- DFA minimization.
- DFA table compression.
- =============================================================================*/
- #ifndef BOOST_SPIRIT_LEXER_HPP
- #define BOOST_SPIRIT_LEXER_HPP
- ///////////////////////////////////////////////////////////////////////////////
- #include <boost/config.hpp>
- #include <boost/throw_exception.hpp>
- #include <boost/spirit/include/classic_core.hpp>
- #include <boost/spirit/include/classic_symbols.hpp>
- #include <boost/spirit/include/classic_chset.hpp>
- #include <boost/spirit/include/classic_escape_char.hpp>
- #include <set>
- #include <map>
- #include <memory> // for auto_ptr/unique_ptr
- #include <vector>
- #include <stack>
- #include <utility> // for pair
- #include <iostream>
- #include <fstream>
- #include <boost/assert.hpp>
- #include <boost/limits.hpp>
- #if defined(BOOST_NO_STD_ITERATOR_TRAITS)
- #define BOOST_SPIRIT_IT_NS impl
- #else
- #define BOOST_SPIRIT_IT_NS std
- #endif
- ///////////////////////////////////////////////////////////////////////////////
- namespace boost {
- namespace spirit {
- namespace classic {
- typedef unsigned char uchar;
- typedef unsigned int node_id_t;
- const node_id_t invalid_node = node_id_t(-1);
- typedef std::set<node_id_t> node_set;
- typedef std::vector<uchar> uchar_vector;
- typedef std::map<node_id_t, node_set> followpos_t;
- typedef std::vector<uchar_vector> state_match_t;
- template <typename TokenT>
- class lexer_control;
- class bad_regex : public std::exception
- {
- };
- namespace lexerimpl
- {
- class node
- {
- public:
- virtual ~node() {}
- virtual node* clone() const = 0;
- virtual bool nullable() const = 0;
- virtual node_set firstpos() const = 0;
- virtual node_set lastpos() const = 0;
- virtual void compute_followpos(followpos_t& followpos) const = 0;
- virtual void compute_state_match(state_match_t& state_match) const = 0;
- virtual void get_eof_ids(node_set& eof_set) const = 0;
- virtual void assign_node_ids(node_id_t& node_count) = 0;
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- virtual void dump(std::ostream& out) const = 0;
- #endif
- };
- class char_node : public node
- {
- public:
- char_node(const uchar c);
- char_node(const char_node& x);
- virtual ~char_node(){}
- virtual node* clone() const;
- virtual bool nullable() const;
- virtual node_set firstpos() const;
- virtual node_set lastpos() const;
- virtual void compute_followpos(followpos_t& followpos) const;
- virtual void compute_state_match(state_match_t& state_match ) const;
- virtual void get_eof_ids(node_set& eof_set) const;
- virtual void assign_node_ids(node_id_t& node_count);
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- virtual void dump(std::ostream& out) const;
- #endif
- private:
- uchar m_char;
- node_id_t m_node_num;
- };
- inline
- char_node::char_node(const uchar c)
- : node()
- , m_char(c)
- , m_node_num(0)
- {
- }
- inline
- char_node::char_node(const char_node& x)
- : node(x)
- , m_char(x.m_char)
- , m_node_num(x.m_node_num)
- {
- }
- inline node *
- char_node::clone() const
- {
- return new char_node(*this);
- }
- inline bool
- char_node::nullable() const
- {
- return false;
- }
- inline node_set
- char_node::firstpos() const
- {
- node_set rval;
- rval.insert(m_node_num);
- return rval;
- }
- inline node_set
- char_node::lastpos() const
- {
- return firstpos();
- }
- inline void
- char_node::compute_followpos(followpos_t&) const
- {
- return;
- }
- inline void
- char_node::compute_state_match(state_match_t& state_match) const
- {
- if (state_match.size() < m_node_num + 1)
- state_match.resize(m_node_num + 1);
- state_match[m_node_num].resize(256);
- state_match[m_node_num][m_char] = 1;
- }
- inline void
- char_node::get_eof_ids(node_set&) const
- {
- return;
- }
- inline void
- char_node::assign_node_ids(node_id_t& node_count)
- {
- m_node_num = node_count++;
- }
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- inline void
- char_node::dump(std::ostream& out) const
- {
- out << "\nchar_node m_char = " << m_char;
- out << " m_node_num = " << m_node_num;
- out << " nullable() = " << (nullable() ? "true" : "false");
- out << " firstpos() = ";
- node_set fp = firstpos();
- std::copy(fp.begin(), fp.end(),
- std::ostream_iterator<node_id_t>(out, ","));
- out << " lastpos() = ";
- node_set lp = lastpos();
- std::copy(lp.begin(), lp.end(),
- std::ostream_iterator<node_id_t>(out, ","));
- }
- #endif
- class epsilon_node : public node
- {
- public:
- epsilon_node();
- epsilon_node(const epsilon_node& x);
- virtual ~epsilon_node(){}
- virtual node* clone() const;
- virtual bool nullable() const;
- virtual node_set firstpos() const;
- virtual node_set lastpos() const;
- virtual void compute_followpos(followpos_t& followpos) const;
- virtual void compute_state_match(state_match_t& state_match ) const;
- virtual void get_eof_ids(node_set& eof_set) const;
- virtual void assign_node_ids(node_id_t& node_count);
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- virtual void dump(std::ostream& out) const;
- #endif
- private:
- node_id_t m_node_num;
- };
- inline
- epsilon_node::epsilon_node()
- : node()
- , m_node_num(0)
- {
- }
- inline
- epsilon_node::epsilon_node(const epsilon_node& x)
- : node(x)
- , m_node_num(x.m_node_num)
- {
- }
- inline node *
- epsilon_node::clone() const
- {
- return new epsilon_node(*this);
- }
- inline bool
- epsilon_node::nullable() const
- {
- return true;
- }
- inline node_set
- epsilon_node::firstpos() const
- {
- return node_set();
- }
- inline node_set
- epsilon_node::lastpos() const
- {
- return node_set();
- }
- inline void
- epsilon_node::compute_followpos(followpos_t&) const
- {
- return;
- }
- inline void
- epsilon_node::compute_state_match(state_match_t& state_match) const
- {
- if (state_match.size() < m_node_num + 1)
- state_match.resize(m_node_num + 1);
- state_match[m_node_num].resize(256, 1);
- }
- inline void
- epsilon_node::get_eof_ids(node_set&) const
- {
- return;
- }
- inline void
- epsilon_node::assign_node_ids(node_id_t& node_count)
- {
- m_node_num = node_count++;
- }
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- inline void
- epsilon_node::dump(std::ostream& out) const
- {
- out << "\nepsilon_node";
- out << " m_node_num = " << m_node_num;
- out << " nullable() = " << (nullable() ? "true" : "false");
- out << " firstpos() = ";
- node_set fp = firstpos();
- std::copy(fp.begin(), fp.end(),
- std::ostream_iterator<node_id_t>(out, ","));
- out << " lastpos() = ";
- node_set lp = lastpos();
- std::copy(lp.begin(), lp.end(),
- std::ostream_iterator<node_id_t>(out, ","));
- }
- #endif
- class or_node : public node
- {
- public:
- or_node(node* left, node* right);
- or_node(const or_node& x);
- virtual ~or_node(){}
- virtual node* clone() const;
- virtual bool nullable() const;
- virtual node_set firstpos() const;
- virtual node_set lastpos() const;
- virtual void compute_followpos(followpos_t& followpos) const;
- virtual void compute_state_match(state_match_t& state_match ) const;
- virtual void get_eof_ids(node_set& eof_set) const;
- virtual void assign_node_ids(node_id_t& node_count);
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- virtual void dump(std::ostream& out) const;
- #endif
- private:
- #ifndef BOOST_NO_CXX11_SMART_PTR
- std::unique_ptr<node> m_left;
- std::unique_ptr<node> m_right;
- #else
- std::auto_ptr<node> m_left;
- std::auto_ptr<node> m_right;
- #endif
- };
- inline
- or_node::or_node(node* left, node* right)
- : node()
- , m_left(left)
- , m_right(right)
- {
- }
- inline
- or_node::or_node(const or_node& x)
- : node(x)
- , m_left(x.m_left->clone())
- , m_right(x.m_right->clone())
- {
- }
- inline node *
- or_node::clone() const
- {
- return new or_node(m_left->clone(), m_right->clone());
- }
- inline bool
- or_node::nullable() const
- {
- return m_left->nullable() || m_right->nullable();
- }
- inline node_set
- or_node::firstpos() const
- {
- node_set rval;
- node_set l = m_left->firstpos();
- node_set r = m_right->firstpos();
- std::set_union(l.begin(), l.end(), r.begin(), r.end(),
- std::inserter(rval, rval.begin()));
- return rval;
- }
- inline node_set
- or_node::lastpos() const
- {
- node_set rval;
- node_set l = m_left->lastpos();
- node_set r = m_right->lastpos();
- std::set_union(l.begin(), l.end(), r.begin(), r.end(),
- std::inserter(rval, rval.begin()));
- return rval;
- }
- inline void
- or_node::compute_followpos(followpos_t& followpos) const
- {
- m_left->compute_followpos(followpos);
- m_right->compute_followpos(followpos);
- }
- inline void
- or_node::compute_state_match(state_match_t& state_match) const
- {
- m_left->compute_state_match(state_match);
- m_right->compute_state_match(state_match);
- }
- inline void
- or_node::get_eof_ids(node_set& eof_nodes) const
- {
- m_left->get_eof_ids(eof_nodes);
- m_right->get_eof_ids(eof_nodes);
- }
- inline void
- or_node::assign_node_ids(node_id_t& node_count)
- {
- m_left->assign_node_ids(node_count);
- m_right->assign_node_ids(node_count);
- }
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- inline void
- or_node::dump(std::ostream& out) const
- {
- m_left->dump(out);
- out << "\nor_node";
- out << " nullable() = " << (nullable() ? "true" : "false");
- out << " firstpos() = ";
- node_set fp = firstpos();
- std::copy(fp.begin(), fp.end(),
- std::ostream_iterator<node_id_t>(out, ","));
- out << " lastpos() = ";
- node_set lp = lastpos();
- std::copy(lp.begin(), lp.end(),
- std::ostream_iterator<node_id_t>(out, ","));
- m_right->dump(out);
- }
- #endif
- class cat_node : public node
- {
- public:
- cat_node(node* left, node* right);
- cat_node(const cat_node& x);
- virtual ~cat_node(){}
- virtual node* clone() const;
- virtual bool nullable() const;
- virtual node_set firstpos() const;
- virtual node_set lastpos() const;
- virtual void compute_followpos(followpos_t& followpos) const;
- virtual void compute_state_match(state_match_t& state_match ) const;
- virtual void get_eof_ids(node_set& eof_set) const;
- virtual void assign_node_ids(node_id_t& node_count);
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- virtual void dump(std::ostream& out) const;
- #endif
- private:
- #ifndef BOOST_NO_CXX11_SMART_PTR
- std::unique_ptr<node> m_left;
- std::unique_ptr<node> m_right;
- #else
- std::auto_ptr<node> m_left;
- std::auto_ptr<node> m_right;
- #endif
- };
- inline
- cat_node::cat_node(node* left, node* right)
- : node()
- , m_left(left)
- , m_right(right)
- {
- }
- inline
- cat_node::cat_node(const cat_node& x)
- : node(x)
- , m_left(x.m_left->clone())
- , m_right(x.m_right->clone())
- {
- }
- inline node *
- cat_node::clone() const
- {
- return new cat_node(m_left->clone(), m_right->clone());
- }
- inline bool
- cat_node::nullable() const
- {
- return m_left->nullable() && m_right->nullable();
- }
- inline node_set
- cat_node::firstpos() const
- {
- if (m_left->nullable())
- {
- node_set rval;
- node_set l = m_left->firstpos();
- node_set r = m_right->firstpos();
- std::set_union(l.begin(), l.end(), r.begin(), r.end(),
- std::inserter(rval, rval.begin()));
- return rval;
- }
- else
- {
- return m_left->firstpos();
- }
- }
- inline node_set
- cat_node::lastpos() const
- {
- if (m_right->nullable())
- {
- node_set rval;
- node_set l = m_left->lastpos();
- node_set r = m_right->lastpos();
- std::set_union(l.begin(), l.end(), r.begin(), r.end(),
- std::inserter(rval, rval.begin()));
- return rval;
- }
- else
- {
- return m_right->lastpos();
- }
- }
- inline void
- cat_node::compute_followpos(followpos_t& followpos) const
- {
- node_set l = m_left->lastpos();
- for (node_set::iterator i = l.begin();
- i != l.end();
- ++i)
- {
- node_set rf = m_right->firstpos();
- followpos[*i].insert(rf.begin(), rf.end());
- }
- m_left->compute_followpos(followpos);
- m_right->compute_followpos(followpos);
- }
- inline void
- cat_node::compute_state_match(state_match_t& state_match) const
- {
- m_left->compute_state_match(state_match);
- m_right->compute_state_match(state_match);
- }
- inline void
- cat_node::get_eof_ids(node_set& eof_nodes) const
- {
- m_left->get_eof_ids(eof_nodes);
- m_right->get_eof_ids(eof_nodes);
- }
- inline void
- cat_node::assign_node_ids(node_id_t& node_count)
- {
- m_left->assign_node_ids(node_count);
- m_right->assign_node_ids(node_count);
- }
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- inline void
- cat_node::dump(std::ostream& out) const
- {
- m_left->dump(out);
- out << "\ncat_node";
- out << " nullable() = " << (nullable() ? "true" : "false");
- out << " firstpos() = ";
- node_set fp = firstpos();
- std::copy(fp.begin(), fp.end(),
- std::ostream_iterator<node_id_t>(out, ","));
- out << " lastpos() = ";
- node_set lp = lastpos();
- std::copy(lp.begin(), lp.end(),
- std::ostream_iterator<node_id_t>(out, ","));
- m_right->dump(out);
- }
- #endif
- class star_node : public node
- {
- public:
- star_node(node* left);
- star_node(const star_node& x);
- virtual ~star_node(){}
- virtual node* clone() const;
- virtual bool nullable() const;
- virtual node_set firstpos() const;
- virtual node_set lastpos() const;
- virtual void compute_followpos(followpos_t& followpos) const;
- virtual void compute_state_match(state_match_t& state_match ) const;
- virtual void get_eof_ids(node_set& eof_set) const;
- virtual void assign_node_ids(node_id_t& node_count);
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- virtual void dump(std::ostream& out) const;
- #endif
- private:
- #ifndef BOOST_NO_CXX11_SMART_PTR
- std::unique_ptr<node> m_left;
- #else
- std::auto_ptr<node> m_left;
- #endif
- };
- inline
- star_node::star_node(node* left)
- : node()
- , m_left(left)
- {
- }
- inline
- star_node::star_node(const star_node& x)
- : node(x)
- , m_left(x.m_left->clone())
- {
- }
- inline node *
- star_node::clone() const
- {
- return new star_node(m_left->clone());
- }
- inline bool
- star_node::nullable() const
- {
- return true;
- }
- inline node_set
- star_node::firstpos() const
- {
- return m_left->firstpos();
- }
- inline node_set
- star_node::lastpos() const
- {
- return m_left->lastpos();
- }
- inline void
- star_node::compute_followpos(followpos_t& followpos) const
- {
- node_set lp = this->lastpos();
- for (node_set::iterator i = lp.begin();
- i != lp.end();
- ++i)
- {
- node_set fp = this->firstpos();
- followpos[*i].insert(fp.begin(), fp.end());
- }
- m_left->compute_followpos(followpos);
- }
- inline void
- star_node::compute_state_match(state_match_t& state_match) const
- {
- m_left->compute_state_match(state_match);
- }
- inline void
- star_node::get_eof_ids(node_set& eof_nodes) const
- {
- m_left->get_eof_ids(eof_nodes);
- }
- inline void
- star_node::assign_node_ids(node_id_t& node_count)
- {
- m_left->assign_node_ids(node_count);
- }
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- inline void
- star_node::dump(std::ostream& out) const
- {
- m_left->dump(out);
- out << "\nstar_node";
- out << " nullable() = " << (nullable() ? "true" : "false");
- out << " firstpos() = ";
- node_set fp = firstpos();
- std::copy(fp.begin(), fp.end(),
- std::ostream_iterator<node_id_t>(out, ","));
- out << " lastpos() = ";
- node_set lp = lastpos();
- std::copy(lp.begin(), lp.end(),
- std::ostream_iterator<node_id_t>(out, ","));
- }
- #endif
- class eof_node : public node
- {
- public:
- eof_node();
- eof_node(const eof_node& x);
- virtual ~eof_node(){}
- virtual node* clone() const;
- virtual bool nullable() const;
- virtual node_set firstpos() const;
- virtual node_set lastpos() const;
- virtual void compute_followpos(followpos_t& followpos) const;
- virtual void compute_state_match(state_match_t& state_match ) const;
- virtual void get_eof_ids(node_set& eof_set) const;
- virtual void assign_node_ids(node_id_t& node_count);
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- virtual void dump(std::ostream& out) const;
- #endif
- private:
- node_id_t m_node_num;
- };
- inline
- eof_node::eof_node()
- : node()
- , m_node_num(0)
- {
- }
- inline
- eof_node::eof_node(const eof_node& x)
- : node(x)
- , m_node_num(x.m_node_num)
- {
- }
- inline node *
- eof_node::clone() const
- {
- return new eof_node(*this);
- }
- inline bool
- eof_node::nullable() const
- {
- return false;
- }
- inline node_set
- eof_node::firstpos() const
- {
- node_set rval;
- rval.insert(m_node_num);
- return rval;
- }
- inline node_set
- eof_node::lastpos() const
- {
- node_set rval;
- rval.insert(m_node_num);
- return rval;
- }
- inline void
- eof_node::compute_followpos(followpos_t&) const
- {
- return;
- }
- inline void
- eof_node::compute_state_match(state_match_t& state_match) const
- {
- if (state_match.size() < m_node_num + 1)
- state_match.resize(m_node_num + 1);
- state_match[m_node_num].resize(256, 0);
- }
- inline void
- eof_node::get_eof_ids(node_set& eof_nodes) const
- {
- eof_nodes.insert(m_node_num);
- }
- inline void
- eof_node::assign_node_ids(node_id_t& node_count)
- {
- m_node_num = node_count++;
- }
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- inline void
- eof_node::dump(std::ostream& out) const
- {
- out << "\neof_node";
- out << " m_node_num = " << m_node_num;
- out << " nullable() = " << (nullable() ? "true" : "false");
- out << " firstpos() = ";
- node_set fp = firstpos();
- std::copy(fp.begin(), fp.end(),
- std::ostream_iterator<node_id_t>(out, ","));
- out << " lastpos() = ";
- node_set lp = lastpos();
- std::copy(lp.begin(), lp.end(),
- std::ostream_iterator<node_id_t>(out, ","));
- }
- #endif
- class ccl_node : public node
- {
- public:
- ccl_node(const std::vector<uchar>& v);
- ccl_node(const uchar c1, const uchar c2);
- ccl_node(const ccl_node& x);
- virtual ~ccl_node(){}
- virtual node* clone() const;
- virtual bool nullable() const;
- virtual node_set firstpos() const;
- virtual node_set lastpos() const;
- virtual void compute_followpos(followpos_t& followpos) const;
- virtual void compute_state_match(state_match_t& state_match ) const;
- virtual void get_eof_ids(node_set& eof_set) const;
- virtual void assign_node_ids(node_id_t& node_count);
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- virtual void dump(std::ostream& out) const;
- #endif
- private:
- std::vector<uchar> m_match;
- node_id_t m_node_num;
- };
- inline
- ccl_node::ccl_node(const std::vector<uchar>& v)
- : node()
- , m_match(v)
- , m_node_num(0)
- {
- m_match.resize(256); // make sure it's the right size
- }
- inline
- ccl_node::ccl_node(const uchar c1, const uchar c2)
- : node()
- , m_match(256, uchar(0))
- , m_node_num(0)
- {
- BOOST_ASSERT(c1 < c2);
- for (std::size_t i = c1; i <= std::size_t(c2); ++i)
- {
- m_match[i] = 1;
- }
- }
- inline
- ccl_node::ccl_node(const ccl_node& x)
- : node(x)
- , m_match(x.m_match)
- , m_node_num(x.m_node_num)
- {
- }
- inline node *
- ccl_node::clone() const
- {
- return new ccl_node(*this);
- }
- inline bool
- ccl_node::nullable() const
- {
- return false;
- }
- inline node_set
- ccl_node::firstpos() const
- {
- node_set rval;
- rval.insert(m_node_num);
- return rval;
- }
- inline node_set
- ccl_node::lastpos() const
- {
- return firstpos();
- }
- inline void
- ccl_node::compute_followpos(followpos_t&) const
- {
- return;
- }
- inline void
- ccl_node::compute_state_match(state_match_t& state_match) const
- {
- if (state_match.size() < m_node_num + 1)
- state_match.resize(m_node_num + 1);
- state_match[m_node_num] = m_match;
- }
- inline void
- ccl_node::get_eof_ids(node_set&) const
- {
- return;
- }
- inline void
- ccl_node::assign_node_ids(node_id_t& node_count)
- {
- m_node_num = node_count++;
- }
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- inline void
- ccl_node::dump(std::ostream& out) const
- {
- out << "\nccl_node m_match = ";
- for (std::size_t i = 0; i < m_match.size(); ++i)
- {
- if (m_match[i])
- out << i << ", ";
- }
- out << " m_node_num = " << m_node_num;
- out << " nullable() = " << (nullable() ? "true" : "false");
- out << " firstpos() = ";
- node_set fp = firstpos();
- std::copy(fp.begin(), fp.end(),
- std::ostream_iterator<node_id_t>(out, ","));
- out << " lastpos() = ";
- node_set lp = lastpos();
- std::copy(lp.begin(), lp.end(),
- std::ostream_iterator<node_id_t>(out, ","));
- }
- #endif
- template <typename ScannerT>
- class make_concat
- {
- typedef typename ScannerT::iterator_t iterator_type;
- public:
- make_concat(std::stack<node*>& the_stack)
- : m_stack(the_stack)
- {}
- void operator()(iterator_type const &, iterator_type const &) const
- {
- node* right = m_stack.top();
- m_stack.pop();
- node* left = m_stack.top();
- m_stack.pop();
- node* newnode = new cat_node(left, right);
- m_stack.push(newnode);
- }
- std::stack<node*>& m_stack;
- };
- template <int CharTSize>
- struct get_byte_aux;
- template<>
- struct get_byte_aux<1>
- {
- template <typename CharT>
- unsigned char operator()(CharT c, unsigned int byte)
- {
- BOOST_ASSERT(byte == 0);
- return c;
- }
- };
- template<>
- struct get_byte_aux<2>
- {
- template <typename CharT>
- unsigned char operator()(CharT c, unsigned int byte)
- {
- static unsigned long mask[] =
- {
- 0xFF00,
- 0x00FF
- };
- BOOST_ASSERT(byte < 2);
- return (c & mask[byte]) >> ((sizeof(c) - 1 - byte) * 8);
- }
- };
- template<>
- struct get_byte_aux<4>
- {
- template <typename CharT>
- unsigned char operator()(CharT c, unsigned int byte)
- {
- static unsigned long mask[] =
- {
- 0xFF000000,
- 0x00FF0000,
- 0x0000FF00,
- 0x000000FF
- };
- BOOST_ASSERT(byte < 4);
- return (c & mask[byte]) >> ((sizeof(c) - 1 - byte) * 8);
- }
- };
- template <typename CharT>
- inline unsigned char
- get_byte(CharT c, unsigned int byte)
- {
- return get_byte_aux<sizeof(c)>()(c, byte);
- }
- template <typename ScannerT>
- class make_star
- {
- typedef typename ScannerT::iterator_t iterator_type;
- public:
- typedef
- typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
- char_t;
- make_star(std::stack<node*>& the_stack)
- : m_stack(the_stack)
- {}
- void operator()(const char_t) const
- {
- node* left = m_stack.top();
- m_stack.pop();
- node* newnode = new star_node(left);
- m_stack.push(newnode);
- }
- std::stack<node*>& m_stack;
- };
- template <typename ScannerT>
- class make_or
- {
- typedef typename ScannerT::iterator_t iterator_type;
- public:
- make_or(std::stack<node*>& the_stack)
- : m_stack(the_stack)
- {}
- void operator()(iterator_type const&, iterator_type const&) const
- {
- node* right = m_stack.top();
- m_stack.pop();
- node* left = m_stack.top();
- m_stack.pop();
- node* newnode = new or_node(left, right);
- m_stack.push(newnode);
- }
- std::stack<node*>& m_stack;
- };
- template <typename ScannerT>
- class make_plus
- {
- typedef typename ScannerT::iterator_t iterator_type;
- public:
- typedef
- typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
- char_t;
- make_plus(std::stack<node*>& the_stack)
- : m_stack(the_stack)
- {}
- void operator()(const char_t) const
- {
- node* left = m_stack.top();
- m_stack.pop();
- node* copy = left->clone();
- node* new_star = new star_node(copy);
- node* new_cat = new cat_node(left, new_star);
- m_stack.push(new_cat);
- }
- std::stack<node*>& m_stack;
- };
- template <typename ScannerT>
- class make_optional
- {
- typedef typename ScannerT::iterator_t iterator_type;
- public:
- typedef
- typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
- char_t;
- make_optional(std::stack<node*>& the_stack)
- : m_stack(the_stack)
- {}
- void operator()(const char_t) const
- {
- node* left = m_stack.top();
- m_stack.pop();
- node* new_or = new or_node(left, new epsilon_node());
- m_stack.push(new_or);
- }
- std::stack<node*>& m_stack;
- };
- ///////////////////////////////////////////////////////////////////////////////
- // utility function
- template <typename CharT>
- inline utility::impl::range<CharT> const&
- full_range()
- {
- static utility::impl::range<CharT> full((std::numeric_limits<CharT>::min)(),
- (std::numeric_limits<CharT>::max)());
- return full;
- }
- namespace ccl_utils
- {
- template <typename char_t>
- inline utility::impl::range_run<char_t>
- negate_range_run(
- const utility::impl::range_run<char_t>& rr)
- {
- utility::impl::range_run<char_t> newrr;
- newrr.set(full_range<char_t>());
- for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
- iter != rr.end(); ++iter)
- newrr.clear(*iter);
- return newrr;
- }
- template <typename char_t>
- inline node*
- create_mb_node_seq(char_t c)
- {
- node* newnode = new char_node(get_byte(c, 0));
- for (unsigned int i = 1; i < sizeof(c); ++i)
- {
- node* cnode = new char_node(get_byte(c, i));
- node* top_node = new cat_node(newnode, cnode);
- newnode = top_node;
- }
- return newnode;
- }
- template <typename char_t>
- inline void
- handle_mb_char(char_t c, bool first_time,
- std::stack<node*>& stack)
- {
- node* newnode = create_mb_node_seq(c);
- if (first_time)
- {
- stack.push(newnode);
- }
- else
- {
- node* top = stack.top();
- stack.pop();
- node* newtop = new or_node(top, newnode);
- stack.push(newtop);
- }
- }
- // forward decl only
- template <typename char_t>
- inline void
- handle_mb_range(char_t c1, char_t c2, bool first_time,
- std::stack<node*>& stack);
- template <typename char_t>
- inline void
- create_nodes(const utility::impl::range_run<char_t>& rr,
- std::stack<node*>& stack)
- {
- if (sizeof(char_t) == 1)
- {
- std::vector<uchar> ccl;
- ccl.resize(256);
- for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
- iter != rr.end(); ++iter)
- {
- for (int i = iter->first; i <= iter->last; ++i)
- {
- // this is always true because of the limited datatype
- // BOOST_ASSERT(uchar(i) < 256 && ccl.size() == 256);
- ccl[uchar(i)] = 1;
- }
- }
- node* new_ccl = new ccl_node(ccl);
- stack.push(new_ccl);
- }
- else
- {
- bool mb_first_time = true;
- for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
- iter != rr.end(); ++iter)
- {
- if (iter->first == iter->last)
- {
- handle_mb_char(iter->first, mb_first_time, stack);
- }
- else
- {
- handle_mb_range(iter->first, iter->last, mb_first_time, stack);
- }
- mb_first_time = false;
- }
- }
- }
- template <typename char_t>
- inline std::size_t
- compute_differing_byte(char_t c1, char_t c2)
- {
- std::size_t rval = 0;
- while (rval < sizeof(c1) &&
- get_byte(c1, (unsigned int)rval) == get_byte(c2, (unsigned int)rval))
- {
- ++rval;
- }
- return rval;
- }
- template <typename char_t>
- inline node*
- create_mb_node_type1(std::size_t j, char_t c1, char_t c2)
- {
- std::size_t diff = get_byte(c2, (unsigned int)j) -
- get_byte(c1, (unsigned int)j);
- if (diff == 1) {
- return 0;
- }
- else if (diff == 2) {
- return new char_node(get_byte(c1, (unsigned int)j)+1);
- }
- else {
- return new ccl_node(get_byte(c1, (unsigned int)j)+1,
- get_byte(c2, (unsigned int)j)-1);
- }
- }
- template <typename char_t>
- inline node *
- create_mb_node_for_byte(std::size_t i, std::size_t j, std::size_t sizem1,
- std::size_t differing_byte, char_t c1, char_t c2, node* newnode)
- {
- node* cnode;
- if (i == sizem1 && j == differing_byte && j != sizem1)
- {
- node* tmp = create_mb_node_type1(j, c1, c2);
- if (tmp == 0)
- {
- delete newnode;
- return 0;
- }
- else
- cnode = tmp;
- }
- else if (i == differing_byte && j == sizem1)
- {
- if (i != sizem1) {
- cnode = new ccl_node(get_byte(c1, (unsigned int)j), 0xFF);
- }
- else {
- cnode = new ccl_node(get_byte(c1, (unsigned int)j),
- get_byte(c2, (unsigned int)j));
- }
- }
- else if (i != differing_byte && i != sizem1 &&
- j == (sizem1 - i + differing_byte))
- {
- cnode = new ccl_node(get_byte(c1, (unsigned int)j)+1, 0xFF);
- }
- else if (i + j - differing_byte > sizem1) {
- cnode = new ccl_node(0, 0xFF);
- }
- else {//if (is plain)
- cnode = new char_node(get_byte(c1, (unsigned int)j));
- }
- node* top_node = new cat_node(newnode, cnode);
- return top_node;
- }
- // On platforms, where wchar_t is a typedef for unsigned short, the
- // comparision for a negative value is pointless
- template <bool is_signed>
- struct correct_char_aux {
- };
- template <>
- struct correct_char_aux<true> {
- template <typename char_t>
- static char_t correct(char_t c) { if (c < 0) c = 0; return c; }
- };
- template <>
- struct correct_char_aux<false> {
- template <typename char_t>
- static char_t correct(char_t c) { return c; }
- };
- template <typename char_t>
- struct correct_char
- {
- static char_t correct(char_t c)
- {
- return correct_char_aux<std::numeric_limits<char_t>::is_signed >::
- correct(c);
- }
- };
- template <typename char_t>
- inline void
- handle_mb_range(char_t c1, char_t c2, bool first_time,
- std::stack<node*>& stack)
- {
- // The algorithm can't handle negative value chars, which don't make
- // much sense anyway. This comparision is pointless for wchar_t's on
- // platforms, where wchar_t is a typedef for unsigned short
- c1 = correct_char<char_t>::correct(c1);
- //if (c1 < 0)
- // c1 = 0;
- BOOST_ASSERT(c1 < c2);
- node* newnode = 0;
- node* savednode = 0;
- const std::size_t differing_byte = compute_differing_byte(c1, c2);
- const std::size_t sizem1 = sizeof(c1) - 1;
- const std::size_t ndb = sizem1 - differing_byte;
- for (std::size_t i = differing_byte; i < sizeof(c1); ++i)
- {
- // generate node for the first byte
- if (differing_byte == 0 && i == ndb)
- {
- node* tmp = create_mb_node_type1(0, c1, c2);
- if (tmp == 0)
- continue;
- else
- newnode = tmp;
- }
- else
- {
- newnode = new char_node(get_byte(c1, 0));
- }
- for (std::size_t j = 1; j < sizeof(c1); ++j)
- {
- newnode = create_mb_node_for_byte(i, j, sizem1, differing_byte,
- c1, c2, newnode);
- if (newnode == 0)
- goto end_outer_for;
- }
- // or together the various parts
- if (savednode)
- {
- node* top_node = new or_node(savednode, newnode);
- savednode = top_node;
- }
- else
- {
- savednode = newnode;
- }
- end_outer_for:
- continue;
- }
- for (std::size_t k = 0; k < ndb; ++k)
- {
- newnode = new char_node(get_byte(c2, 0));
- for (std::size_t j = 1; j < sizeof(c2); ++j)
- {
- node* cnode;
- if (k == differing_byte && j == sizem1 && k != sizem1)
- cnode = new ccl_node(0, get_byte(c2, (unsigned int)j));
- else if (k != differing_byte && k != sizem1 &&
- j == (sizem1 - k + differing_byte))
- cnode = new ccl_node(0, get_byte(c2, (unsigned int)j)-1);
- else if (k + j - differing_byte > sizem1)
- cnode = new ccl_node(0, 0xFF);
- else //if (is plain)
- cnode = new char_node(get_byte(c2, (unsigned int)j));
- node* top_node = new cat_node(newnode, cnode);
- newnode = top_node;
- }
- // or together the various parts
- if (savednode)
- {
- node* top_node = new or_node(savednode, newnode);
- savednode = top_node;
- }
- else
- {
- savednode = newnode;
- }
- }
- if (first_time)
- {
- stack.push(savednode);
- }
- else
- {
- node* top = stack.top();
- stack.pop();
- node* newtop = new or_node(top, savednode);
- stack.push(newtop);
- }
- }
- } // namespace ccl_utils
- template <typename ScannerT>
- class make_char
- {
- typedef typename ScannerT::iterator_t iterator_type;
- public:
- typedef
- typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
- char_t;
- make_char(std::stack<node*>& the_stack)
- : m_stack(the_stack)
- {}
- void operator()(iterator_type const& first, iterator_type const& last) const
- {
- const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
- escape_char_parser<lex_escapes, char_t>();
- char_t the_char;
- iterator_type first_ = first;
- ScannerT scan(first_, last);
- lex_escape_ch[assign(the_char)].parse(scan);
- node* newnode = ccl_utils::create_mb_node_seq(the_char);
- m_stack.push(newnode);
- }
- std::stack<node*>& m_stack;
- };
- template <typename ScannerT>
- class make_ccl
- {
- typedef typename ScannerT::iterator_t iterator_type;
- public:
- typedef
- typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
- char_t;
- make_ccl(std::stack<node*>& the_stack)
- : m_stack(the_stack)
- {}
- static bool is_equal_to_string(iterator_type first,
- iterator_type const & last, const char* str)
- {
- while (first != last &&*str &&*first ==*str)
- {
- ++first;
- ++str;
- }
- return*str == 0;
- }
- template <typename ParserT>
- static void fill_ccl(utility::impl::range_run<char_t>& rr, const ParserT& parser)
- {
- for (int i = 0; i < 256; ++i)
- {
- if (parser.test(static_cast<char_t>(uchar(i))))
- rr.set(utility::impl::range<char_t>(char_t(i), char_t(i)));
- }
- }
- void operator()(iterator_type const& first_, iterator_type const& last) const
- {
- BOOST_ASSERT(*first_ == '[');
- iterator_type first = first_;
- ++first; // skip over '['
- bool negated_ccl = false;
- if (*first == '^')
- {
- negated_ccl = true;
- ++first;
- }
- utility::impl::range_run<char_t> rr;
- while (first != last &&*first != ']')
- {
- if (*first == '[') // it's a ccl_expr like [:space:]
- {
- // check for [:space:], etc.
- if (is_equal_to_string(first, last, "[:alnum:]"))
- {
- fill_ccl(rr, alnum_p);
- }
- else if (is_equal_to_string(first, last, "[:alpha:]"))
- {
- fill_ccl(rr, alpha_p);
- }
- else if (is_equal_to_string(first, last, "[:blank:]"))
- {
- fill_ccl(rr, blank_p);
- }
- else if (is_equal_to_string(first, last, "[:cntrl:]"))
- {
- fill_ccl(rr, cntrl_p);
- }
- else if (is_equal_to_string(first, last, "[:digit:]"))
- {
- fill_ccl(rr, digit_p);
- }
- else if (is_equal_to_string(first, last, "[:graph:]"))
- {
- fill_ccl(rr, graph_p);
- }
- else if (is_equal_to_string(first, last, "[:lower:]"))
- {
- fill_ccl(rr, lower_p);
- }
- else if (is_equal_to_string(first, last, "[:print:]"))
- {
- fill_ccl(rr, print_p);
- }
- else if (is_equal_to_string(first, last, "[:punct:]"))
- {
- fill_ccl(rr, punct_p);
- }
- else if (is_equal_to_string(first, last, "[:space:]"))
- {
- fill_ccl(rr, space_p);
- }
- else if (is_equal_to_string(first, last, "[:upper:]"))
- {
- fill_ccl(rr, upper_p);
- }
- else if (is_equal_to_string(first, last, "[:xdigit:]"))
- {
- fill_ccl(rr, xdigit_p);
- }
- // this can't happen, because it's parsed before we get here.
- //else
- // throw bad_regex();
- // Advance past the character class expression
- while (first != last &&*first != ']')
- ++first;
- BOOST_ASSERT(*first == ']');
- ++first;
- }
- else {
- const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
- escape_char_parser<lex_escapes, char_t>();
- char_t c1;
- ScannerT scan(first, last);
- lex_escape_ch[assign(c1)].parse(scan);
- if (*scan.first == '-') // insert a range
- {
- ++scan.first;
- char_t c2;
- lex_escape_ch[assign(c2)].parse(scan);
- BOOST_ASSERT(c1 < c2); // Throw exception?
- rr.set(utility::impl::range<char_t>(c1, c2));
- }
- else // insert 1 char
- {
- rr.set(utility::impl::range<char_t>(c1, c1));
- }
- }
- }
- if (negated_ccl)
- {
- rr = ccl_utils::negate_range_run(rr);
- }
- ccl_utils::create_nodes(rr, m_stack);
- }
- std::stack<node*>& m_stack;
- };
- template <typename ScannerT>
- class make_any_char
- {
- typedef typename ScannerT::iterator_t iterator_type;
- public:
- typedef
- typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
- char_t;
- std::stack<node*>& m_stack;
- make_any_char(std::stack<node*>& the_stack)
- : m_stack(the_stack)
- {}
- void operator()(const char_t c) const
- {
- BOOST_ASSERT(c == '.');
- do_any_char();
- }
- void do_any_char() const
- {
- static utility::impl::range_run<char_t> rr;
- rr.set(full_range<char_t>());
- char_t newline = '\n';
- rr.clear(utility::impl::range<char_t>(newline, newline));
- ccl_utils::create_nodes(rr, m_stack);
- }
- };
- template <typename ScannerT>
- class make_string
- {
- typedef typename ScannerT::iterator_t iterator_type;
- public:
- typedef
- typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
- char_t;
- std::stack<node*>& m_stack;
- make_string(std::stack<node*>& the_stack)
- : m_stack(the_stack)
- {}
- void operator()(iterator_type const& first, iterator_type const& last) const
- {
- BOOST_ASSERT(*first == '"');
- iterator_type first_ = first;
- ScannerT scan(first_, last);
- ++scan.first; // skip over '"'
- // empty string not allowed
- if (*scan.first == '"')
- {
- boost::throw_exception(bad_regex());
- }
- const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
- escape_char_parser<lex_escapes, char_t>();
- char_t c;
- lex_escape_ch[assign(c)].parse(scan);
- node* top_node = ccl_utils::create_mb_node_seq(c);
- while (*scan.first != '"' && scan.first != scan.last)
- {
- lex_escape_ch[assign(c)].parse(scan);
- node* cur_node = ccl_utils::create_mb_node_seq(c);
- top_node = new cat_node(top_node, cur_node);
- }
- m_stack.push(top_node);
- }
- };
- inline
- node* repeat_node(node* n, int num)
- {
- node* list_of_nodes = n;
- for (int i = 1; i < num; ++i)
- {
- list_of_nodes = new cat_node(list_of_nodes, n->clone());
- }
- return list_of_nodes;
- }
- inline
- node* optional_node(node* n)
- {
- return new or_node(n, new epsilon_node());
- }
- template <typename ScannerT>
- class make_rep1
- {
- typedef typename ScannerT::iterator_t iterator_type;
- public:
- typedef
- typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
- char_t;
- std::stack<node*>& m_stack;
- make_rep1(std::stack<node*>& the_stack)
- : m_stack(the_stack)
- {}
- void operator()(iterator_type const& first, iterator_type const& last) const
- {
- BOOST_ASSERT(*first == '{');
- iterator_type first_ = first;
- ScannerT scan(first_, last);
- ++scan.first; // skip over '{'
- unsigned int count;
- uint_p[assign(count)].parse(scan);
- if (count == 0)
- boost::throw_exception(bad_regex());
- node* top_node = m_stack.top();
- m_stack.pop();
- top_node = repeat_node(top_node, count);
- m_stack.push(top_node);
- }
- };
- template <typename ScannerT>
- class make_rep2
- {
- typedef typename ScannerT::iterator_t iterator_type;
- public:
- typedef
- typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
- char_t;
- std::stack<node*>& m_stack;
- make_rep2(std::stack<node*>& the_stack)
- : m_stack(the_stack)
- {}
- void operator()(iterator_type const& first, iterator_type const& last) const
- {
- BOOST_ASSERT(*first == '{');
- iterator_type first_ = first;
- ScannerT scan (first_, last);
- ++scan.first; // skip over '{'
- unsigned int count;
- uint_p[assign(count)].parse(scan);
- if (count == 0)
- boost::throw_exception(bad_regex());
- node* top_node = m_stack.top();
- m_stack.pop();
- top_node = new cat_node(repeat_node(top_node, count),
- new star_node(top_node->clone()));
- m_stack.push(top_node);
- }
- };
- template <typename ScannerT>
- class make_rep3
- {
- typedef typename ScannerT::iterator_t iterator_type;
- public:
- typedef
- typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
- char_t;
- std::stack<node*>& m_stack;
- make_rep3(std::stack<node*>& the_stack)
- : m_stack(the_stack)
- {}
- void operator()(iterator_type const& first, iterator_type const& last) const
- {
- BOOST_ASSERT(*first == '{');
- iterator_type first_ = first;
- ScannerT scan(first_, last);
- ++scan.first; // skip over '{'
- unsigned int count1, count2;
- uint_p[assign(count1)].parse(scan);
- if (count1 == 0)
- boost::throw_exception(bad_regex());
- ++scan.first; // skip over ','
- uint_p[assign(count2)].parse(scan);
- if (count2 <= count1)
- boost::throw_exception(bad_regex());
- node* top_node = m_stack.top();
- m_stack.pop();
- node* repeats = repeat_node(top_node, count1);
- top_node = new cat_node(repeats,
- repeat_node(optional_node(top_node->clone()),
- count2 - count1));
- m_stack.push(top_node);
- }
- };
- ///////////////////////////////////////////////////////////////////////////////
- //
- // Lexer grammar
- //
- // Defines the grammar, which mandates the syntax of the understood
- // lexeme definitions passed to lexer::register_regex.
- //
- ///////////////////////////////////////////////////////////////////////////////
- class lexer_grammar : public boost::spirit::classic::grammar<lexer_grammar>
- {
- public:
- lexer_grammar(std::stack<node*> &node_stack_)
- : node_stack(node_stack_) {}
- template <typename ScannerT>
- struct definition
- {
- typedef rule<ScannerT> rule_t;
- typedef typename ScannerT::iterator_t iterator_type;
- typedef
- typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
- char_t;
- rule_t regex, re, series, singleton, singleton2, fullccl, ccl, string,
- escseq, ccl_char;
- symbols<> ccl_expr;
- definition(lexer_grammar const &self)
- {
- regex =
- re >> !('/' >> re) >> !ch_p('$')
- ;
- re =
- series
- >>*( ('|' >> series)[make_or<ScannerT>(self.node_stack)] )
- ;
- series =
- singleton
- >>*( singleton[make_concat<ScannerT>(self.node_stack)] )
- ;
- singleton =
- ch_p('.')[make_any_char<ScannerT>(self.node_stack)]
- >> singleton2
- | fullccl
- >> singleton2
- | ('"' >> string >> '"')
- [
- make_string<ScannerT>(self.node_stack)
- ]
- >> singleton2
- | '(' >> re >> ')'
- >> singleton2
- | ((anychar_p - chset<>("/|*+?.(){}\\")) | escseq)
- [
- make_char<ScannerT>(self.node_stack)
- ]
- >> singleton2
- ;
- singleton2 =
- ch_p('*')[make_star<ScannerT>(self.node_stack)]
- >> singleton2
- | ch_p('+')[make_plus<ScannerT>(self.node_stack)]
- >> singleton2
- | ch_p('?')[make_optional<ScannerT>(self.node_stack)]
- >> singleton2
- | ('{' >> uint_p >> '}')
- [
- make_rep1<ScannerT>(self.node_stack)
- ]
- >> singleton2
- | ('{' >> uint_p >> ',' >> '}')
- [
- make_rep2<ScannerT>(self.node_stack)
- ]
- >> singleton2
- | ('{' >> uint_p >> ',' >> uint_p >> '}')
- [
- make_rep3<ScannerT>(self.node_stack)
- ]
- >> singleton2
- | epsilon_p
- ;
- fullccl =
- ('[' >> !ch_p('^') >> ccl >> ']')
- [
- make_ccl<ScannerT>(self.node_stack)
- ]
- ;
- ccl =
- *(ccl_expr | (ccl_char >> !('-' >> ccl_char)))
- ;
- ccl_char =
- ( (anychar_p - chset<>("\\\n]")) | escseq )
- ;
- ccl_expr =
- "[:alnum:]",
- "[:alpha:]",
- "[:blank:]",
- "[:cntrl:]",
- "[:digit:]",
- "[:graph:]",
- "[:lower:]",
- "[:print:]",
- "[:punct:]",
- "[:space:]",
- "[:upper:]",
- "[:xdigit:]"
- ;
- string =
- +( (anychar_p - chset<>("\"\\")) | escseq )
- ;
- typedef
- uint_parser<char_t, 8, 1,
- std::numeric_limits<char_t>::digits / 3 + 1
- > oct_parser_t;
- typedef
- uint_parser<char_t, 16, 1,
- std::numeric_limits<char_t>::digits / 4 + 1
- > hex_parser_t;
- escseq =
- ch_p('\\')
- >> (
- oct_parser_t()
- | as_lower_d['x'] >> hex_parser_t()
- | (anychar_p - chset<>('\n'))
- )
- ;
- #define BOOST_SLEX_DEBUG (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- BOOST_SPIRIT_DEBUG_TRACE_RULE(regex, BOOST_SLEX_DEBUG);
- BOOST_SPIRIT_DEBUG_TRACE_RULE(re, BOOST_SLEX_DEBUG);
- BOOST_SPIRIT_DEBUG_TRACE_RULE(series, BOOST_SLEX_DEBUG);
- BOOST_SPIRIT_DEBUG_TRACE_RULE(singleton, BOOST_SLEX_DEBUG);
- BOOST_SPIRIT_DEBUG_TRACE_RULE(singleton2, BOOST_SLEX_DEBUG);
- BOOST_SPIRIT_DEBUG_TRACE_RULE(fullccl, BOOST_SLEX_DEBUG);
- BOOST_SPIRIT_DEBUG_TRACE_RULE(ccl, BOOST_SLEX_DEBUG);
- BOOST_SPIRIT_DEBUG_TRACE_RULE(string, BOOST_SLEX_DEBUG);
- BOOST_SPIRIT_DEBUG_TRACE_RULE(escseq, BOOST_SLEX_DEBUG);
- BOOST_SPIRIT_DEBUG_TRACE_RULE(ccl_char, BOOST_SLEX_DEBUG);
-
- #undef BOOST_SLEX_DEBUG
- }
- rule<ScannerT> const&
- start() const { return regex; }
- };
- std::stack<node*> &node_stack;
- }; // class lexer_grammar
- template <typename StringT>
- inline node *
- parse(lexer_grammar& g, StringT const& str)
- {
- typedef
- scanner<typename StringT::const_iterator, scanner_policies<> >
- scanner_t;
- typedef typename rule<scanner_t>::template result<scanner_t>::type
- result_t;
-
- typename StringT::const_iterator first = str.begin();
- typename StringT::const_iterator last = str.end();
- scanner_t scan(first, last);
- // typename rule<scanner_t>::result_t hit = g.parse(scan);
- result_t hit = g.parse(scan);
- if (!hit || !scan.at_end())
- {
- while (g.node_stack.size())
- {
- delete g.node_stack.top();
- g.node_stack.pop();
- }
- return 0;
- }
- BOOST_ASSERT(g.node_stack.size() == 1);
- node* rval = g.node_stack.top();
- g.node_stack.pop();
- node* an_eof_node = new eof_node();
- rval = new cat_node(rval, an_eof_node);
- return rval;
- }
- inline
- void make_case_insensitive(state_match_t& state_match)
- {
- // TODO: Fix this.
- // This doesn't take into account foreign languages, figure out how to
- // do that. Also this approach is broken for this implementation of
- // wide chars.
- for (state_match_t::iterator iter = state_match.begin();
- iter != state_match.end(); ++iter)
- {
- int i, j;
- for (i = 'A', j = 'a'; i <= 'Z'; ++i, ++j)
- {
- // if either is set, turn them both on
- (*iter)[i] = (*iter)[j] = uchar((*iter)[i] | (*iter)[j]);
- }
- }
- }
- template<bool wide_char>
- struct regex_match_helper;
- template<>
- struct regex_match_helper<false> // single byte char
- {
- template <typename DfaT, typename IteratorT>
- static bool
- do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
- int& regex_index,
- std::basic_string<
- typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
- > *token)
- {
- typedef std::basic_string<
- typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
- > string_type;
- typedef typename string_type::size_type size_type;
-
- node_id_t s = 0;
- node_id_t last_accepting_index = invalid_node;
- IteratorT p = first;
- IteratorT last_accepting_cpos = first;
- while (p != last)
- {
- s = dfa.transition_table[s][(uchar)*p];
- if (s == invalid_node)
- break;
- if (token) token->append((size_type)1, *p);
- ++p;
- if (dfa.acceptance_index[s] != invalid_node)
- {
- last_accepting_index = s;
- last_accepting_cpos = p;
- }
- }
- if (last_accepting_index != invalid_node)
- {
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- std::cout << "dfa.acceptance_index[" << last_accepting_index << "] = " <<
- dfa.acceptance_index[last_accepting_index] << '\n';
- #endif
- first = last_accepting_cpos;
- regex_index = dfa.acceptance_index[last_accepting_index];
- return true;
- }
- else
- return false;
- }
- };
- template<>
- struct regex_match_helper<true> // wide char
- {
- template <typename DfaT, typename IteratorT>
- static bool
- do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
- int& regex_index,
- std::basic_string<
- typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
- > *token)
- {
- typedef
- typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
- char_t;
- typedef std::basic_string<char_t> string_type;
- typedef typename string_type::size_type size_type;
- node_id_t s = 0;
- node_id_t last_accepting_index = invalid_node;
- IteratorT wp = first;
- IteratorT last_accepting_cpos = first;
- while (wp != last)
- {
- for (unsigned int i = 0; i < sizeof(char_t); ++i)
- {
- s = dfa.transition_table[s][get_byte(*wp,i)];
- if (s == invalid_node)
- {
- goto break_while;
- }
- }
- if (token) token->append((size_type)1, *wp);
- ++wp;
- if (dfa.acceptance_index[s] != invalid_node)
- {
- last_accepting_index = s;
- last_accepting_cpos = wp;
- }
- }
- break_while:
- if (last_accepting_index != invalid_node)
- {
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- std::cout << "dfa.acceptance_index[" << last_accepting_index << "] = " <<
- dfa.acceptance_index[last_accepting_index] << '\n';
- #endif
- first = last_accepting_cpos;
- regex_index = dfa.acceptance_index[last_accepting_index];
- return true;
- }
- else
- return false;
- }
- };
- template <typename DfaT, typename IteratorT, bool wide_char>
- struct regex_match
- {
- static bool
- do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
- int& regex_index,
- std::basic_string<
- typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
- > *token)
- {
- return regex_match_helper<wide_char>::do_match(
- dfa, first, last, regex_index, token);
- }
- };
- } // namespace lexerimpl
- ///////////////////////////////////////////////////////////////////////////////
- //
- template <typename IteratorT = char const*, typename TokenT = int,
- typename CallbackT = void(*)(IteratorT const &,
- IteratorT &,
- IteratorT const&,
- TokenT const&,
- lexer_control<TokenT> &)>
- class lexer
- {
- public:
- typedef CallbackT callback_t;
- typedef
- typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
- char_t;
- struct regex_info
- {
- std::basic_string<char_t> str;
- TokenT token;
- CallbackT callback;
- regex_info(const std::basic_string<char_t>& _str,
- const TokenT& _token,
- const CallbackT& _callback)
- : str(_str)
- , token(_token)
- , callback(_callback)
- {}
- };
- struct dfa_table
- {
- std::vector<std::vector<node_id_t> > transition_table;
- std::vector<node_id_t> acceptance_index;
- };
- typedef std::vector<node_id_t> node_table_t;
- typedef std::vector<node_table_t> transition_table_t;
- typedef std::vector<dfa_table> dfa_t;
- lexer(unsigned int states = 1);
- void register_regex(const std::basic_string<char_t>& regex,
- const TokenT& id, const CallbackT& cb = CallbackT(),
- unsigned int state = 0);
- TokenT next_token(IteratorT &first, IteratorT const &last,
- std::basic_string<char_t> *token = 0);
- void create_dfa();
- bool has_compiled_dfa() { return m_compiled_dfa; }
-
- void set_case_insensitive(bool insensitive);
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- void dump(std::ostream& out);
- #endif
- typedef std::vector<std::vector<regex_info> > regex_list_t;
- bool load (std::ifstream &in, long unique_id = 0);
- bool save (std::ofstream &out, long unique_id = 0);
- enum {
- SLEX_SIGNATURE = 0x58454C53, // "SLEX"
- SLEX_VERSION_100 = 0x0100, // persistance version
- SLEX_LAST_KNOWN_VERSION = SLEX_VERSION_100,
- SLEX_MINOR_VERSION_MASK = 0xFF
- };
- private:
- void create_dfa_for_state(int state);
- static bool regex_match(const dfa_t& dfa, IteratorT& first,
- IteratorT& last, int& regex_index);
- mutable std::stack<lexerimpl::node*> node_stack;
- lexerimpl::lexer_grammar g;
- mutable bool m_compiled_dfa;
- mutable dfa_t m_dfa;
- regex_list_t m_regex_list;
- bool m_case_insensitive;
- unsigned int m_state;
- std::stack<unsigned int> m_state_stack;
- unsigned int m_num_states;
- };
- template <typename IteratorT, typename TokenT, typename CallbackT>
- inline
- lexer<IteratorT, TokenT, CallbackT>::lexer(unsigned int states)
- : g(node_stack)
- , m_compiled_dfa(false)
- , m_regex_list(states)
- , m_case_insensitive(false)
- , m_state(0)
- , m_state_stack()
- , m_num_states(states)
- {
- BOOST_SPIRIT_DEBUG_TRACE_NODE_NAME(g, "slex::lexer",
- BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX);
- }
- template <typename IteratorT, typename TokenT, typename CallbackT>
- inline void
- lexer<IteratorT, TokenT, CallbackT>::register_regex(
- const std::basic_string<char_t>& regex, const TokenT& id,
- const CallbackT& callback, unsigned int state)
- {
- if (state > m_num_states) {
- m_regex_list.resize(state);
- m_num_states = state;
- }
- m_regex_list[state].push_back(regex_info(regex, id, callback));
- }
- template <typename IteratorT, typename TokenT, typename CallbackT>
- inline TokenT
- lexer<IteratorT, TokenT, CallbackT>::next_token(
- IteratorT &first, IteratorT const& last,
- std::basic_string<
- typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
- > *token)
- {
- if (!m_compiled_dfa)
- {
- create_dfa();
- }
- IteratorT saved = first;
- int regex_index;
- if (!lexerimpl::regex_match<dfa_table, IteratorT, (sizeof(char_t) > 1)>::
- do_match(m_dfa[m_state], first, last, regex_index, token))
- return -1; // TODO: can't return -1, need to return some invalid token.
- // how to figure this out? We can use traits I guess.
- else
- {
- regex_info regex = m_regex_list[m_state][regex_index];
- TokenT rval = regex.token;
- if (regex.callback)
- {
- // execute corresponding callback
- lexer_control<TokenT> controller(rval, m_state, m_state_stack);
- regex.callback(saved, first, last, regex.token, controller);
- if (controller.ignore_current_token_set()) {
- if (token)
- token->erase();
- return next_token(first, last, token);
- }
- }
- return rval;
- }
- }
- namespace lexerimpl
- {
- inline
- bool find_acceptance_state(const node_set& eof_node_ids,
- const node_set& current_set,
- node_id_t& acceptance_node_id)
- {
- for(node_set::const_iterator nsi = eof_node_ids.begin();
- nsi != eof_node_ids.end(); ++nsi)
- {
- node_id_t eof_node_id =*nsi;
- if (current_set.end() != current_set.find(eof_node_id))
- {
- // store the first one we come to as the
- // matched pattern
- acceptance_node_id = eof_node_id;
- // don't bother searching for more
- return true;
- }
- }
- return false;
- }
- template <typename RegexListT, typename GrammarT>
- #ifndef BOOST_NO_CXX11_SMART_PTR
- inline std::unique_ptr<node>
- #else
- inline std::auto_ptr<node>
- #endif
- parse_regexes(const RegexListT& regex_list, GrammarT& g)
- {
- // parse the expressions into a tree
- if (regex_list.begin() == regex_list.end())
- boost::throw_exception(bad_regex());
- typename RegexListT::const_iterator ri = regex_list.begin();
- #ifndef BOOST_NO_CXX11_SMART_PTR
- std::unique_ptr<node> tree(lexerimpl::parse(g, (*ri).str));
- #else
- std::auto_ptr<node> tree(lexerimpl::parse(g, (*ri).str));
- #endif
- if (tree.get() == 0)
- boost::throw_exception(bad_regex());
- ++ri;
- for (/**/; ri != regex_list.end(); ++ri)
- {
- #ifndef BOOST_NO_CXX11_SMART_PTR
- std::unique_ptr<node> next_tree(lexerimpl::parse(g, (*ri).str));
- #else
- std::auto_ptr<node> next_tree(lexerimpl::parse(g, (*ri).str));
- #endif
- if (next_tree.get() == 0)
- boost::throw_exception(bad_regex());
- #ifndef BOOST_NO_CXX11_SMART_PTR
- tree = std::unique_ptr<node>(new or_node(tree.release(), next_tree.release()));
- #else
- tree = std::auto_ptr<node>(new or_node(tree.release(), next_tree.release()));
- #endif
- }
- return tree;
- }
- } //namespace lexerimpl
- template <typename IteratorT, typename TokenT, typename CallbackT>
- inline void
- lexer<IteratorT, TokenT, CallbackT>::create_dfa()
- {
- m_dfa.resize(m_num_states);
- for (unsigned int i = 0; i < m_num_states; ++i)
- create_dfa_for_state(i);
- }
- // Algorithm from Compilers: Principles, Techniques, and Tools p. 141
- template <typename IteratorT, typename TokenT, typename CallbackT>
- inline void
- lexer<IteratorT, TokenT, CallbackT>::create_dfa_for_state(int state)
- {
- using lexerimpl::node;
- #ifndef BOOST_NO_CXX11_SMART_PTR
- std::unique_ptr<node> tree = lexerimpl::parse_regexes(m_regex_list[state], g);
- #else
- std::auto_ptr<node> tree = lexerimpl::parse_regexes(m_regex_list[state], g);
- #endif
- node_id_t dummy = 0;
- tree->assign_node_ids(dummy);
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- tree->dump(std::cout);
- #endif
- // compute followpos(root)
- followpos_t followpos;
- tree->compute_followpos(followpos);
- // the dfa states <-> nfa state groups
- std::map<node_set, node_id_t> dstates1;
- std::map<node_id_t, node_set> dstates2;
- // the dfa transitions
- m_dfa[state].transition_table.push_back(
- std::vector<node_id_t>(256, invalid_node));
- m_dfa[state].acceptance_index.push_back(invalid_node);
- // whether the dfa state has been processed yet
- std::vector<node_id_t> marked;
- // used to give a unique id to each dfa state
- node_id_t num_states = 0;
- // initially, the only unmarked state in Dstates is firstpos(root).
- marked.push_back(0);
- node_set fpr = tree->firstpos();
- dstates1[fpr] = 0;
- dstates2[0] = fpr;
- state_match_t state_match;
- tree->compute_state_match(state_match);
- if (m_case_insensitive)
- lexerimpl::make_case_insensitive(state_match);
- node_set eof_node_ids;
- tree->get_eof_ids(eof_node_ids);
- // translate the eof_node_ids into a 0-based index
- std::map<node_id_t, node_id_t> eof_node_id_map;
- unsigned int x = 0;
- for (node_set::iterator node_id_it = eof_node_ids.begin();
- node_id_it != eof_node_ids.end();
- ++node_id_it)
- {
- eof_node_id_map[*node_id_it] = x++;
- }
- // figure out if this is an acceptance state
- node_id_t eof_node_id;
- if (lexerimpl::find_acceptance_state(eof_node_ids, fpr, eof_node_id))
- {
- m_dfa[state].acceptance_index[0] = eof_node_id_map[eof_node_id];
- }
- std::vector<node_id_t>::iterator i = std::find(marked.begin(), marked.end(),
- node_id_t(0));
- while (marked.end() != i)
- {
- *i = 1;
- node_id_t T = node_id_t(std::distance(marked.begin(), i));
- BOOST_ASSERT(T < dstates2.size());
- node_set Tstates = dstates2[T];
- for (node_id_t j = 0; j < 256; ++j)
- {
- node_set U;
- for (node_set::iterator k = Tstates.begin();
- k != Tstates.end(); ++k)
- {
- node_id_t p =*k;
- BOOST_ASSERT(p < state_match.size());
- BOOST_ASSERT(j < state_match[p].size());
- if (state_match[p][j])
- {
- node_set fpp = followpos[p];
- U.insert(fpp.begin(), fpp.end());
- }
- }
- if (U.size() > 0)
- {
- std::map<node_set, node_id_t>::iterator l = dstates1.find(U);
- node_id_t target_state;
- if (l == dstates1.end()) // not in the states yet
- {
- ++num_states;
- dstates1[U] = target_state = num_states;
- dstates2[target_state] = U;
- marked.push_back(0);
- m_dfa[state].transition_table.push_back(
- std::vector<node_id_t>(256, invalid_node));
- m_dfa[state].acceptance_index.push_back(invalid_node);
- // figure out if this is an acceptance state
- node_id_t eof_node_id;
- if (lexerimpl::find_acceptance_state(eof_node_ids, U, eof_node_id))
- {
- m_dfa[state].acceptance_index[target_state] =
- eof_node_id_map[eof_node_id];
- }
- }
- else
- {
- target_state = dstates1[U];
- }
- BOOST_ASSERT(T < m_dfa[state].transition_table.size());
- BOOST_ASSERT(j < m_dfa[state].transition_table[T].size());
- m_dfa[state].transition_table[T][j] = target_state;
- }
- }
- i = std::find(marked.begin(), marked.end(), node_id_t(0));
- }
- m_compiled_dfa = true;
- }
- template <typename IteratorT, typename TokenT, typename CallbackT>
- inline void
- lexer<IteratorT, TokenT, CallbackT>::set_case_insensitive(bool insensitive)
- {
- m_case_insensitive = insensitive;
- }
- #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
- template <typename IteratorT, typename TokenT, typename CallbackT>
- inline void
- lexer<IteratorT, TokenT, CallbackT>::dump(std::ostream& out)
- {
- for (unsigned x = 0; x < m_dfa.size(); ++x)
- {
- out << "\nm_dfa[" << x << "] has " << m_dfa[x].transition_table.size() << " states\n";
- for (node_id_t i = 0; i < m_dfa[x].transition_table.size(); ++i)
- {
- out << "state " << i << ":";
- for (node_id_t j = 0; j < m_dfa[x].transition_table[i].size(); ++j)
- {
- if (m_dfa[x].transition_table[i][j] != invalid_node)
- out << j << "->" << m_dfa[x].transition_table[i][j] << " ";
- }
- out << "\n";
- }
- out << "acceptance states: ";
- for(unsigned int k = 0; k < m_dfa[x].acceptance_index.size(); ++k)
- {
- if (m_dfa[x].acceptance_index[k] != invalid_node)
- out << '<' << k << ',' << m_dfa[x].acceptance_index[k] << "> ";
- }
- out << endl;
- }
- }
- #endif
- ///////////////////////////////////////////////////////////////////////////////
- // load the lexer tables
- #define slex_in(strm, val) \
- strm.read((char*)&val, sizeof(val)); \
- if (std::ios::goodbit != strm.rdstate()) return false
- template <typename IteratorT, typename TokenT, typename CallbackT>
- inline bool
- lexer<IteratorT, TokenT, CallbackT>::load (std::ifstream &in, long unique_id)
- {
- // ensure correct signature and version
- long signature = 0;
- slex_in (in, signature);
- if (signature != SLEX_SIGNATURE)
- return false; // not for us
- long version = 0;
- slex_in (in, version);
- if ((version & ~SLEX_MINOR_VERSION_MASK) > SLEX_LAST_KNOWN_VERSION)
- return false; // to new for us
- long uid = 0;
- slex_in (in, uid);
- if (uid != unique_id)
- return false; // not saved by us
- // load auxiliary members
- int num_states = 0;
- slex_in (in, num_states);
- // load the dfa tables
- dfa_t in_dfa;
- std::size_t dfa_size = 0;
- slex_in (in, dfa_size);
- in_dfa.resize(dfa_size);
- for (std::size_t dfa = 0; dfa < dfa_size; ++dfa)
- {
- // load the transition tables
- std::size_t tt_size = 0;
- transition_table_t &tt_table = in_dfa[dfa].transition_table;
- slex_in (in, tt_size);
- tt_table.resize(tt_size);
- for (std::size_t tt = 0; tt < tt_size; ++tt)
- {
- std::size_t nt_size = 0;
- node_table_t &nt_table = tt_table[tt];
- slex_in (in, nt_size);
- nt_table.resize(nt_size);
- for (std::size_t nt = 0; nt < nt_size; ++nt)
- {
- slex_in (in, nt_table[nt]);
- }
- }
- // load the acceptance index table
- std::size_t ai_size = 0;
- node_table_t &ai_table = in_dfa[dfa].acceptance_index;
- slex_in (in, ai_size);
- ai_table.resize(ai_size);
- for (std::size_t ai = 0; ai < ai_size; ++ai)
- {
- slex_in (in, ai_table[ai]);
- }
- }
- m_dfa.swap(in_dfa); // success, swap in the read values
- m_num_states = num_states;
- m_compiled_dfa = true;
- return true;
- }
- #undef slex_in
- ///////////////////////////////////////////////////////////////////////////////
- // save the lexer tables
- #define slex_out(strm, val) \
- strm.write((char*)&val, sizeof(val)); \
- if (std::ios::goodbit != strm.rdstate()) return false
- template <typename IteratorT, typename TokenT, typename CallbackT>
- inline bool
- lexer<IteratorT, TokenT, CallbackT>::save (std::ofstream &out, long unique_id)
- {
- // save signature and version information
- long out_long = SLEX_SIGNATURE;
- slex_out(out, out_long);
- out_long = SLEX_VERSION_100;
- slex_out(out, out_long);
- slex_out(out, unique_id);
- // save auxiliary members
- slex_out(out, m_num_states);
- // save the dfa tables
- typedef typename dfa_t::const_iterator dfa_iter_t;
- typedef transition_table_t::const_iterator transition_table_iter_t;
- typedef node_table_t::const_iterator node_table_iter_t;
- std::size_t out_size_t = m_dfa.size();
- slex_out(out, out_size_t);
- dfa_iter_t end = m_dfa.end();
- for (dfa_iter_t it = m_dfa.begin(); it != end; ++it)
- {
- // save the transition table
- out_size_t = (*it).transition_table.size();
- slex_out(out, out_size_t);
- transition_table_iter_t tt_end = (*it).transition_table.end();
- for (transition_table_iter_t tt_it = (*it).transition_table.begin();
- tt_it != tt_end;
- ++tt_it)
- {
- out_size_t = (*tt_it).size();
- slex_out(out, out_size_t);
- node_table_iter_t nt_end = (*tt_it).end();
- for (node_table_iter_t nt_it = (*tt_it).begin();
- nt_it != nt_end;
- ++nt_it)
- {
- slex_out(out, (*nt_it));
- }
- }
- // save the acceptance index table
- out_size_t = (*it).acceptance_index.size();
- slex_out(out, out_size_t);
- node_table_iter_t nt_end = (*it).acceptance_index.end();
- for (node_table_iter_t nt_it = (*it).acceptance_index.begin();
- nt_it != nt_end;
- ++nt_it)
- {
- slex_out(out, (*nt_it));
- }
- }
- return true;
- }
- #undef slex_out
- /*
- a lexer_control object supports some operations on the lexer.
- get current lexer state
- set state
- terminate
- state stack (push, pop, top)
- set new token return value
- ignore the current token
- yymore
- get length of matched token
- */
- template <typename TokenT>
- class lexer_control
- {
- public:
- lexer_control(TokenT& token, unsigned int& current_state,
- std::stack<unsigned int>& state_stack);
- // functions dealing with the lexer state
- // set the state to state
- void set_state(unsigned int state);
- // get the current state
- unsigned int get_state();
- // pushes the current state onto the top of the state stack and
- // switches to new_state
- void push_state(unsigned int new_state);
- // pops the top of the state stack and switches to it.
- void pop_state();
- // returns the top of the stack without altering the stack's contents.
- unsigned int top_state();
- // functions dealing with the token returned.
- // set a new TokenT return value, overriding that one that was
- // registered via. register_regex()
- void set_token(const TokenT& token);
- // tell the lexer to return an invalid token, signifying termination.
- void terminate();
- // ignore the current token, and move on to the next one. The current
- // token will NOT be returned from next_token()
- void ignore_current_token();
- // returns true if ignore_current_token() has been called,
- // false otherwise.
- bool ignore_current_token_set();
- private:
- TokenT& m_token;
- bool m_ignore_current_token;
- unsigned int& m_current_state;
- std::stack<unsigned int>& m_state_stack;
- };
- template <typename TokenT>
- inline
- lexer_control<TokenT>::lexer_control(TokenT& token, unsigned int& current_state,
- std::stack<unsigned int>& state_stack)
- : m_token(token)
- , m_ignore_current_token(false)
- , m_current_state(current_state)
- , m_state_stack(state_stack)
- {
- }
- template <typename TokenT>
- inline void
- lexer_control<TokenT>::set_state(unsigned int state)
- {
- m_current_state = state;
- }
- template <typename TokenT>
- inline unsigned int
- lexer_control<TokenT>::get_state()
- {
- return m_current_state;
- }
- template <typename TokenT>
- inline void
- lexer_control<TokenT>::push_state(unsigned int new_state)
- {
- m_state_stack.push(m_current_state);
- m_current_state = new_state;
- }
- template <typename TokenT>
- inline void
- lexer_control<TokenT>::pop_state()
- {
- m_current_state = m_state_stack.top();
- m_state_stack.pop();
- }
- template <typename TokenT>
- inline unsigned int
- lexer_control<TokenT>::top_state()
- {
- return m_state_stack.top();
- }
- template <typename TokenT>
- inline void
- lexer_control<TokenT>::set_token(const TokenT& token)
- {
- m_token = token;
- }
- template <typename TokenT>
- inline void
- lexer_control<TokenT>::terminate()
- {
- m_token = -1; // TOOD: fix this, need to be determined by traits
- }
- template <typename TokenT>
- inline void
- lexer_control<TokenT>::ignore_current_token()
- {
- m_ignore_current_token = true;
- }
- template <typename TokenT>
- inline bool
- lexer_control<TokenT>::ignore_current_token_set()
- {
- return m_ignore_current_token;
- }
- } // namespace classic
- } // namespace spirit
- } // namespace boost
- #undef BOOST_SPIRIT_IT_NS
- #endif
|