regex_split.hpp 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. /*
  2. *
  3. * Copyright (c) 1998-2002
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE regex_split.hpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Implements regex_split and associated functions.
  16. * Note this is an internal header file included
  17. * by regex.hpp, do not include on its own.
  18. */
  19. #ifndef BOOST_REGEX_SPLIT_HPP
  20. #define BOOST_REGEX_SPLIT_HPP
  21. namespace boost{
  22. #ifdef BOOST_MSVC
  23. #pragma warning(push)
  24. #pragma warning(disable: 4103)
  25. #endif
  26. #ifdef BOOST_HAS_ABI_HEADERS
  27. # include BOOST_ABI_PREFIX
  28. #endif
  29. #ifdef BOOST_MSVC
  30. #pragma warning(pop)
  31. #endif
  32. #ifdef BOOST_MSVC
  33. # pragma warning(push)
  34. #if BOOST_MSVC < 1910
  35. #pragma warning(disable:4800)
  36. #endif
  37. #endif
  38. namespace BOOST_REGEX_DETAIL_NS{
  39. template <class charT>
  40. const basic_regex<charT>& get_default_expression(charT)
  41. {
  42. static const charT expression_text[4] = { '\\', 's', '+', '\00', };
  43. static const basic_regex<charT> e(expression_text);
  44. return e;
  45. }
  46. template <class OutputIterator, class charT, class Traits1, class Alloc1>
  47. class split_pred
  48. {
  49. typedef std::basic_string<charT, Traits1, Alloc1> string_type;
  50. typedef typename string_type::const_iterator iterator_type;
  51. iterator_type* p_last;
  52. OutputIterator* p_out;
  53. std::size_t* p_max;
  54. std::size_t initial_max;
  55. public:
  56. split_pred(iterator_type* a, OutputIterator* b, std::size_t* c)
  57. : p_last(a), p_out(b), p_max(c), initial_max(*c) {}
  58. bool operator()(const match_results<iterator_type>& what);
  59. };
  60. template <class OutputIterator, class charT, class Traits1, class Alloc1>
  61. bool split_pred<OutputIterator, charT, Traits1, Alloc1>::operator()
  62. (const match_results<iterator_type>& what)
  63. {
  64. *p_last = what[0].second;
  65. if(what.size() > 1)
  66. {
  67. // output sub-expressions only:
  68. for(unsigned i = 1; i < what.size(); ++i)
  69. {
  70. *(*p_out) = what.str(i);
  71. ++(*p_out);
  72. if(0 == --*p_max) return false;
  73. }
  74. return *p_max != 0;
  75. }
  76. else
  77. {
  78. // output $` only if it's not-null or not at the start of the input:
  79. const sub_match<iterator_type>& sub = what[-1];
  80. if((sub.first != sub.second) || (*p_max != initial_max))
  81. {
  82. *(*p_out) = sub.str();
  83. ++(*p_out);
  84. return --*p_max;
  85. }
  86. }
  87. //
  88. // initial null, do nothing:
  89. return true;
  90. }
  91. } // namespace BOOST_REGEX_DETAIL_NS
  92. template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2>
  93. std::size_t regex_split(OutputIterator out,
  94. std::basic_string<charT, Traits1, Alloc1>& s,
  95. const basic_regex<charT, Traits2>& e,
  96. match_flag_type flags,
  97. std::size_t max_split)
  98. {
  99. typedef typename std::basic_string<charT, Traits1, Alloc1>::const_iterator ci_t;
  100. //typedef typename match_results<ci_t>::allocator_type match_allocator;
  101. ci_t last = s.begin();
  102. std::size_t init_size = max_split;
  103. BOOST_REGEX_DETAIL_NS::split_pred<OutputIterator, charT, Traits1, Alloc1> pred(&last, &out, &max_split);
  104. ci_t i, j;
  105. i = s.begin();
  106. j = s.end();
  107. regex_grep(pred, i, j, e, flags);
  108. //
  109. // if there is still input left, do a final push as long as max_split
  110. // is not exhausted, and we're not splitting sub-expressions rather
  111. // than whitespace:
  112. if(max_split && (last != s.end()) && (e.mark_count() == 0))
  113. {
  114. *out = std::basic_string<charT, Traits1, Alloc1>((ci_t)last, (ci_t)s.end());
  115. ++out;
  116. last = s.end();
  117. --max_split;
  118. }
  119. //
  120. // delete from the string everything that has been processed so far:
  121. s.erase(0, last - s.begin());
  122. //
  123. // return the number of new records pushed:
  124. return init_size - max_split;
  125. }
  126. template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2>
  127. inline std::size_t regex_split(OutputIterator out,
  128. std::basic_string<charT, Traits1, Alloc1>& s,
  129. const basic_regex<charT, Traits2>& e,
  130. match_flag_type flags = match_default)
  131. {
  132. return regex_split(out, s, e, flags, UINT_MAX);
  133. }
  134. template <class OutputIterator, class charT, class Traits1, class Alloc1>
  135. inline std::size_t regex_split(OutputIterator out,
  136. std::basic_string<charT, Traits1, Alloc1>& s)
  137. {
  138. return regex_split(out, s, BOOST_REGEX_DETAIL_NS::get_default_expression(charT(0)), match_default, UINT_MAX);
  139. }
  140. #ifdef BOOST_MSVC
  141. # pragma warning(pop)
  142. #endif
  143. #ifdef BOOST_MSVC
  144. #pragma warning(push)
  145. #pragma warning(disable: 4103)
  146. #endif
  147. #ifdef BOOST_HAS_ABI_HEADERS
  148. # include BOOST_ABI_SUFFIX
  149. #endif
  150. #ifdef BOOST_MSVC
  151. #pragma warning(pop)
  152. #endif
  153. } // namespace boost
  154. #endif