token_iterator.hpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. // (C) Copyright Gennadiy Rozental 2001.
  2. // Distributed under the Boost Software License, Version 1.0.
  3. // (See accompanying file LICENSE_1_0.txt or copy at
  4. // http://www.boost.org/LICENSE_1_0.txt)
  5. // See http://www.boost.org/libs/test for the library home page.
  6. //
  7. // File : $RCSfile$
  8. //
  9. // Version : $Revision$
  10. //
  11. // Description : token iterator for string and range tokenization
  12. // ***************************************************************************
  13. #ifndef BOOST_TEST_UTILS_TOKEN_ITERATOR_HPP
  14. #define BOOST_TEST_UTILS_TOKEN_ITERATOR_HPP
  15. // Boost
  16. #include <boost/config.hpp>
  17. #include <boost/detail/workaround.hpp>
  18. #include <boost/iterator/iterator_categories.hpp>
  19. #include <boost/iterator/iterator_traits.hpp>
  20. #include <boost/test/utils/iterator/input_iterator_facade.hpp>
  21. #include <boost/test/utils/basic_cstring/basic_cstring.hpp>
  22. #include <boost/test/utils/named_params.hpp>
  23. #include <boost/test/utils/foreach.hpp>
  24. // STL
  25. #include <iosfwd>
  26. #include <cctype>
  27. #include <boost/test/detail/suppress_warnings.hpp>
  28. //____________________________________________________________________________//
  29. #ifdef BOOST_NO_STDC_NAMESPACE
  30. namespace std{ using ::ispunct; using ::isspace; }
  31. #endif
  32. namespace boost {
  33. namespace unit_test {
  34. namespace utils {
  35. // ************************************************************************** //
  36. // ************** ti_delimeter_type ************** //
  37. // ************************************************************************** //
  38. enum ti_delimeter_type {
  39. dt_char, // character is delimeter if it among explicit list of some characters
  40. dt_ispunct, // character is delimeter if it satisfies ispunct functor
  41. dt_isspace, // character is delimeter if it satisfies isspace functor
  42. dt_none // no character is delimeter
  43. };
  44. namespace ut_detail {
  45. // ************************************************************************** //
  46. // ************** default_char_compare ************** //
  47. // ************************************************************************** //
  48. template<typename CharT>
  49. class default_char_compare {
  50. public:
  51. bool operator()( CharT c1, CharT c2 )
  52. {
  53. #ifdef BOOST_CLASSIC_IOSTREAMS
  54. return std::string_char_traits<CharT>::eq( c1, c2 );
  55. #else
  56. return std::char_traits<CharT>::eq( c1, c2 );
  57. #endif
  58. }
  59. };
  60. // ************************************************************************** //
  61. // ************** delim_policy ************** //
  62. // ************************************************************************** //
  63. template<typename CharT,typename CharCompare>
  64. class delim_policy {
  65. typedef basic_cstring<CharT const> cstring;
  66. public:
  67. // Constructor
  68. explicit delim_policy( ti_delimeter_type type_ = dt_char, cstring delimeters_ = cstring() )
  69. : m_type( type_ )
  70. {
  71. set_delimeters( delimeters_ );
  72. }
  73. void set_delimeters( ti_delimeter_type type_ ) { m_type = type_; }
  74. void set_delimeters( cstring delimeters_ )
  75. {
  76. m_delimeters = delimeters_;
  77. if( !m_delimeters.is_empty() )
  78. m_type = dt_char;
  79. }
  80. void set_delimeters( nfp::nil ) {}
  81. bool operator()( CharT c )
  82. {
  83. switch( m_type ) {
  84. case dt_char: {
  85. BOOST_TEST_FOREACH( CharT, delim, m_delimeters )
  86. if( CharCompare()( delim, c ) )
  87. return true;
  88. return false;
  89. }
  90. case dt_ispunct:
  91. return (std::ispunct)( c ) != 0;
  92. case dt_isspace:
  93. return (std::isspace)( c ) != 0;
  94. case dt_none:
  95. return false;
  96. }
  97. return false;
  98. }
  99. private:
  100. // Data members
  101. cstring m_delimeters;
  102. ti_delimeter_type m_type;
  103. };
  104. // ************************************************************************** //
  105. // ************** token_assigner ************** //
  106. // ************************************************************************** //
  107. template<typename TraversalTag>
  108. struct token_assigner {
  109. #if BOOST_WORKAROUND( BOOST_DINKUMWARE_STDLIB, < 306 )
  110. template<typename Iterator, typename C, typename T>
  111. static void assign( Iterator b, Iterator e, std::basic_string<C,T>& t )
  112. { for( ; b != e; ++b ) t += *b; }
  113. template<typename Iterator, typename C>
  114. static void assign( Iterator b, Iterator e, basic_cstring<C>& t ) { t.assign( b, e ); }
  115. #else
  116. template<typename Iterator, typename Token>
  117. static void assign( Iterator b, Iterator e, Token& t ) { t.assign( b, e ); }
  118. #endif
  119. template<typename Iterator, typename Token>
  120. static void append_move( Iterator& b, Token& ) { ++b; }
  121. };
  122. //____________________________________________________________________________//
  123. template<>
  124. struct token_assigner<single_pass_traversal_tag> {
  125. template<typename Iterator, typename Token>
  126. static void assign( Iterator /*b*/, Iterator /*e*/, Token& /*t*/ ) {}
  127. template<typename Iterator, typename Token>
  128. static void append_move( Iterator& b, Token& t ) { t += *b; ++b; }
  129. };
  130. } // namespace ut_detail
  131. // ************************************************************************** //
  132. // ************** modifiers ************** //
  133. // ************************************************************************** //
  134. namespace {
  135. nfp::keyword<struct dropped_delimeters_t > dropped_delimeters;
  136. nfp::keyword<struct kept_delimeters_t > kept_delimeters;
  137. nfp::typed_keyword<bool,struct keep_empty_tokens_t > keep_empty_tokens;
  138. nfp::typed_keyword<std::size_t,struct max_tokens_t > max_tokens;
  139. }
  140. // ************************************************************************** //
  141. // ************** token_iterator_base ************** //
  142. // ************************************************************************** //
  143. template<typename Derived,
  144. typename CharT,
  145. typename CharCompare = ut_detail::default_char_compare<CharT>,
  146. typename ValueType = basic_cstring<CharT const>,
  147. typename Reference = basic_cstring<CharT const>,
  148. typename Traversal = forward_traversal_tag>
  149. class token_iterator_base
  150. : public input_iterator_facade<Derived,ValueType,Reference,Traversal> {
  151. typedef basic_cstring<CharT const> cstring;
  152. typedef ut_detail::delim_policy<CharT,CharCompare> delim_policy;
  153. typedef input_iterator_facade<Derived,ValueType,Reference,Traversal> base;
  154. protected:
  155. // Constructor
  156. explicit token_iterator_base()
  157. : m_is_dropped( dt_isspace )
  158. , m_is_kept( dt_ispunct )
  159. , m_keep_empty_tokens( false )
  160. , m_tokens_left( static_cast<std::size_t>(-1) )
  161. , m_token_produced( false )
  162. {
  163. }
  164. template<typename Modifier>
  165. void
  166. apply_modifier( Modifier const& m )
  167. {
  168. if( m.has( dropped_delimeters ) )
  169. m_is_dropped.set_delimeters( m[dropped_delimeters] );
  170. if( m.has( kept_delimeters ) )
  171. m_is_kept.set_delimeters( m[kept_delimeters] );
  172. if( m.has( keep_empty_tokens ) )
  173. m_keep_empty_tokens = true;
  174. nfp::opt_assign( m_tokens_left, m, max_tokens );
  175. }
  176. template<typename Iter>
  177. bool get( Iter& begin, Iter end )
  178. {
  179. typedef ut_detail::token_assigner<BOOST_DEDUCED_TYPENAME iterator_traversal<Iter>::type> Assigner;
  180. Iter check_point;
  181. this->m_value.clear();
  182. if( !m_keep_empty_tokens ) {
  183. while( begin != end && m_is_dropped( *begin ) )
  184. ++begin;
  185. if( begin == end )
  186. return false;
  187. check_point = begin;
  188. if( m_tokens_left == 1 )
  189. while( begin != end )
  190. Assigner::append_move( begin, this->m_value );
  191. else if( m_is_kept( *begin ) )
  192. Assigner::append_move( begin, this->m_value );
  193. else
  194. while( begin != end && !m_is_dropped( *begin ) && !m_is_kept( *begin ) )
  195. Assigner::append_move( begin, this->m_value );
  196. --m_tokens_left;
  197. }
  198. else { // m_keep_empty_tokens is true
  199. check_point = begin;
  200. if( begin == end ) {
  201. if( m_token_produced )
  202. return false;
  203. m_token_produced = true;
  204. }
  205. if( m_is_kept( *begin ) ) {
  206. if( m_token_produced )
  207. Assigner::append_move( begin, this->m_value );
  208. m_token_produced = !m_token_produced;
  209. }
  210. else if( !m_token_produced && m_is_dropped( *begin ) )
  211. m_token_produced = true;
  212. else {
  213. if( m_is_dropped( *begin ) )
  214. check_point = ++begin;
  215. while( begin != end && !m_is_dropped( *begin ) && !m_is_kept( *begin ) )
  216. Assigner::append_move( begin, this->m_value );
  217. m_token_produced = true;
  218. }
  219. }
  220. Assigner::assign( check_point, begin, this->m_value );
  221. return true;
  222. }
  223. private:
  224. // Data members
  225. delim_policy m_is_dropped;
  226. delim_policy m_is_kept;
  227. bool m_keep_empty_tokens;
  228. std::size_t m_tokens_left;
  229. bool m_token_produced;
  230. };
  231. // ************************************************************************** //
  232. // ************** basic_string_token_iterator ************** //
  233. // ************************************************************************** //
  234. template<typename CharT,
  235. typename CharCompare = ut_detail::default_char_compare<CharT> >
  236. class basic_string_token_iterator
  237. : public token_iterator_base<basic_string_token_iterator<CharT,CharCompare>,CharT,CharCompare> {
  238. typedef basic_cstring<CharT const> cstring;
  239. typedef token_iterator_base<basic_string_token_iterator<CharT,CharCompare>,CharT,CharCompare> base;
  240. public:
  241. explicit basic_string_token_iterator() {}
  242. explicit basic_string_token_iterator( cstring src )
  243. : m_src( src )
  244. {
  245. this->init();
  246. }
  247. // warning: making the constructor accept anything else than a cstring should
  248. // ensure that no temporary object is created during string creation (previous
  249. // definition was "template<typename Src, typename Modifier> basic_string_token_iterator( Src src ..."
  250. // which may create a temporary string copy when called with an std::string.
  251. template<typename Modifier>
  252. basic_string_token_iterator( cstring src, Modifier const& m )
  253. : m_src( src )
  254. {
  255. this->apply_modifier( m );
  256. this->init();
  257. }
  258. private:
  259. friend class input_iterator_core_access;
  260. // input iterator implementation
  261. bool get()
  262. {
  263. typename cstring::iterator begin = m_src.begin();
  264. bool res = base::get( begin, m_src.end() );
  265. m_src.assign( begin, m_src.end() );
  266. return res;
  267. }
  268. // Data members
  269. cstring m_src;
  270. };
  271. typedef basic_string_token_iterator<char> string_token_iterator;
  272. typedef basic_string_token_iterator<wchar_t> wstring_token_iterator;
  273. // ************************************************************************** //
  274. // ************** range_token_iterator ************** //
  275. // ************************************************************************** //
  276. template<typename Iter,
  277. typename CharCompare = ut_detail::default_char_compare<BOOST_DEDUCED_TYPENAME iterator_value<Iter>::type>,
  278. typename ValueType = std::basic_string<BOOST_DEDUCED_TYPENAME iterator_value<Iter>::type>,
  279. typename Reference = ValueType const&>
  280. class range_token_iterator
  281. : public token_iterator_base<range_token_iterator<Iter,CharCompare,ValueType,Reference>,
  282. typename iterator_value<Iter>::type,CharCompare,ValueType,Reference> {
  283. typedef basic_cstring<typename ValueType::value_type> cstring;
  284. typedef token_iterator_base<range_token_iterator<Iter,CharCompare,ValueType,Reference>,
  285. typename iterator_value<Iter>::type,CharCompare,ValueType,Reference> base;
  286. public:
  287. explicit range_token_iterator() {}
  288. explicit range_token_iterator( Iter begin, Iter end = Iter() )
  289. : m_begin( begin ), m_end( end )
  290. {
  291. this->init();
  292. }
  293. range_token_iterator( range_token_iterator const& rhs )
  294. : base( rhs )
  295. {
  296. if( this->m_valid ) {
  297. m_begin = rhs.m_begin;
  298. m_end = rhs.m_end;
  299. }
  300. }
  301. template<typename Modifier>
  302. range_token_iterator( Iter begin, Iter end, Modifier const& m )
  303. : m_begin( begin ), m_end( end )
  304. {
  305. this->apply_modifier( m );
  306. this->init();
  307. }
  308. private:
  309. friend class input_iterator_core_access;
  310. // input iterator implementation
  311. bool get()
  312. {
  313. return base::get( m_begin, m_end );
  314. }
  315. // Data members
  316. Iter m_begin;
  317. Iter m_end;
  318. };
  319. // ************************************************************************** //
  320. // ************** make_range_token_iterator ************** //
  321. // ************************************************************************** //
  322. template<typename Iter>
  323. inline range_token_iterator<Iter>
  324. make_range_token_iterator( Iter begin, Iter end = Iter() )
  325. {
  326. return range_token_iterator<Iter>( begin, end );
  327. }
  328. //____________________________________________________________________________//
  329. template<typename Iter,typename Modifier>
  330. inline range_token_iterator<Iter>
  331. make_range_token_iterator( Iter begin, Iter end, Modifier const& m )
  332. {
  333. return range_token_iterator<Iter>( begin, end, m );
  334. }
  335. //____________________________________________________________________________//
  336. } // namespace utils
  337. } // namespace unit_test
  338. } // namespace boost
  339. //____________________________________________________________________________//
  340. #include <boost/test/detail/enable_warnings.hpp>
  341. #endif // BOOST_TEST_UTILS_TOKEN_ITERATOR_HPP