regex_iterator.hpp 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. ///////////////////////////////////////////////////////////////////////////////
  2. /// \file regex_iterator.hpp
  3. /// Contains the definition of the regex_iterator type, an STL-compatible iterator
  4. /// for stepping through all the matches in a sequence.
  5. //
  6. // Copyright 2008 Eric Niebler. Distributed under the Boost
  7. // Software License, Version 1.0. (See accompanying file
  8. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. #ifndef BOOST_XPRESSIVE_REGEX_ITERATOR_HPP_EAN_10_04_2005
  10. #define BOOST_XPRESSIVE_REGEX_ITERATOR_HPP_EAN_10_04_2005
  11. // MS compatible compilers support #pragma once
  12. #if defined(_MSC_VER)
  13. # pragma once
  14. #endif
  15. #include <boost/noncopyable.hpp>
  16. #include <boost/intrusive_ptr.hpp>
  17. #include <boost/iterator/iterator_traits.hpp>
  18. #include <boost/xpressive/detail/detail_fwd.hpp>
  19. #include <boost/xpressive/detail/core/access.hpp>
  20. #include <boost/xpressive/detail/utility/counted_base.hpp>
  21. namespace boost { namespace xpressive { namespace detail
  22. {
  23. //////////////////////////////////////////////////////////////////////////
  24. // regex_iterator_impl
  25. //
  26. template<typename BidiIter>
  27. struct regex_iterator_impl
  28. : counted_base<regex_iterator_impl<BidiIter> >
  29. {
  30. typedef detail::core_access<BidiIter> access;
  31. regex_iterator_impl
  32. (
  33. BidiIter begin
  34. , BidiIter cur
  35. , BidiIter end
  36. , BidiIter next_search
  37. , basic_regex<BidiIter> const &rex
  38. , regex_constants::match_flag_type flags
  39. , bool not_null = false
  40. )
  41. : rex_(rex)
  42. , what_()
  43. , state_(begin, end, what_, *access::get_regex_impl(rex_), flags)
  44. , flags_(flags)
  45. , not_null_(not_null)
  46. {
  47. this->state_.cur_ = cur;
  48. this->state_.next_search_ = next_search;
  49. }
  50. bool next()
  51. {
  52. this->state_.reset(this->what_, *access::get_regex_impl(this->rex_));
  53. if(!regex_search_impl(this->state_, this->rex_, this->not_null_))
  54. {
  55. return false;
  56. }
  57. // Report position() correctly by setting the base different from prefix().first
  58. access::set_base(this->what_, this->state_.begin_);
  59. this->state_.cur_ = this->state_.next_search_ = this->what_[0].second;
  60. this->not_null_ = (0 == this->what_.length());
  61. return true;
  62. }
  63. bool equal_to(regex_iterator_impl<BidiIter> const &that) const
  64. {
  65. return this->rex_.regex_id() == that.rex_.regex_id()
  66. && this->state_.begin_ == that.state_.begin_
  67. && this->state_.cur_ == that.state_.cur_
  68. && this->state_.end_ == that.state_.end_
  69. && this->flags_ == that.flags_
  70. ;
  71. }
  72. basic_regex<BidiIter> rex_;
  73. match_results<BidiIter> what_;
  74. match_state<BidiIter> state_;
  75. regex_constants::match_flag_type const flags_;
  76. bool not_null_;
  77. };
  78. } // namespace detail
  79. //////////////////////////////////////////////////////////////////////////
  80. // regex_iterator
  81. //
  82. template<typename BidiIter>
  83. struct regex_iterator
  84. {
  85. typedef basic_regex<BidiIter> regex_type;
  86. typedef match_results<BidiIter> value_type;
  87. typedef typename iterator_difference<BidiIter>::type difference_type;
  88. typedef value_type const *pointer;
  89. typedef value_type const &reference;
  90. typedef std::forward_iterator_tag iterator_category;
  91. /// INTERNAL ONLY
  92. typedef detail::regex_iterator_impl<BidiIter> impl_type_;
  93. regex_iterator()
  94. : impl_()
  95. {
  96. }
  97. regex_iterator
  98. (
  99. BidiIter begin
  100. , BidiIter end
  101. , basic_regex<BidiIter> const &rex
  102. , regex_constants::match_flag_type flags = regex_constants::match_default
  103. )
  104. : impl_()
  105. {
  106. if(0 != rex.regex_id()) // Empty regexes are guaranteed to match nothing
  107. {
  108. this->impl_ = new impl_type_(begin, begin, end, begin, rex, flags);
  109. this->next_();
  110. }
  111. }
  112. template<typename LetExpr>
  113. regex_iterator
  114. (
  115. BidiIter begin
  116. , BidiIter end
  117. , basic_regex<BidiIter> const &rex
  118. , detail::let_<LetExpr> const &args
  119. , regex_constants::match_flag_type flags = regex_constants::match_default
  120. )
  121. : impl_()
  122. {
  123. if(0 != rex.regex_id()) // Empty regexes are guaranteed to match nothing
  124. {
  125. this->impl_ = new impl_type_(begin, begin, end, begin, rex, flags);
  126. detail::bind_args(args, this->impl_->what_);
  127. this->next_();
  128. }
  129. }
  130. regex_iterator(regex_iterator<BidiIter> const &that)
  131. : impl_(that.impl_) // COW
  132. {
  133. }
  134. regex_iterator<BidiIter> &operator =(regex_iterator<BidiIter> const &that)
  135. {
  136. this->impl_ = that.impl_; // COW
  137. return *this;
  138. }
  139. friend bool operator ==(regex_iterator<BidiIter> const &left, regex_iterator<BidiIter> const &right)
  140. {
  141. if(!left.impl_ || !right.impl_)
  142. {
  143. return !left.impl_ && !right.impl_;
  144. }
  145. return left.impl_->equal_to(*right.impl_);
  146. }
  147. friend bool operator !=(regex_iterator<BidiIter> const &left, regex_iterator<BidiIter> const &right)
  148. {
  149. return !(left == right);
  150. }
  151. value_type const &operator *() const
  152. {
  153. return this->impl_->what_;
  154. }
  155. value_type const *operator ->() const
  156. {
  157. return &this->impl_->what_;
  158. }
  159. /// If what.prefix().first != what[0].second and if the element match_prev_avail is not set in
  160. /// flags then sets it. Then behaves as if by calling regex_search(what[0].second, end, what, *pre, flags),
  161. /// with the following variation: in the event that the previous match found was of zero length
  162. /// (what[0].length() == 0) then attempts to find a non-zero length match starting at what[0].second,
  163. /// only if that fails and provided what[0].second != suffix().second does it look for a (possibly
  164. /// zero length) match starting from what[0].second + 1. If no further match is found then sets
  165. /// *this equal to the end of sequence iterator.
  166. /// \post (*this)-\>size() == pre-\>mark_count() + 1
  167. /// \post (*this)-\>empty() == false
  168. /// \post (*this)-\>prefix().first == An iterator denoting the end point of the previous match found
  169. /// \post (*this)-\>prefix().last == (**this)[0].first
  170. /// \post (*this)-\>prefix().matched == (*this)-\>prefix().first != (*this)-\>prefix().second
  171. /// \post (*this)-\>suffix().first == (**this)[0].second
  172. /// \post (*this)-\>suffix().last == end
  173. /// \post (*this)-\>suffix().matched == (*this)-\>suffix().first != (*this)-\>suffix().second
  174. /// \post (**this)[0].first == The starting iterator for this match.
  175. /// \post (**this)[0].second == The ending iterator for this match.
  176. /// \post (**this)[0].matched == true if a full match was found, and false if it was a partial match (found as a result of the match_partial flag being set).
  177. /// \post (**this)[n].first == For all integers n \< (*this)-\>size(), the start of the sequence that matched sub-expression n. Alternatively, if sub-expression n did not participate in the match, then end.
  178. /// \post (**this)[n].second == For all integers n \< (*this)-\>size(), the end of the sequence that matched sub-expression n. Alternatively, if sub-expression n did not participate in the match, then end.
  179. /// \post (**this)[n].matched == For all integers n \< (*this)-\>size(), true if sub-expression n participated in the match, false otherwise.
  180. /// \post (*this)-\>position() == The distance from the start of the original sequence being iterated, to the start of this match.
  181. regex_iterator<BidiIter> &operator ++()
  182. {
  183. this->fork_(); // un-share the implementation
  184. this->next_();
  185. return *this;
  186. }
  187. regex_iterator<BidiIter> operator ++(int)
  188. {
  189. regex_iterator<BidiIter> tmp(*this);
  190. ++*this;
  191. return tmp;
  192. }
  193. private:
  194. /// INTERNAL ONLY
  195. void fork_()
  196. {
  197. if(1 != this->impl_->use_count())
  198. {
  199. // This is OK, the use_count is > 1
  200. impl_type_ *that = this->impl_.get();
  201. this->impl_ = new impl_type_
  202. (
  203. that->state_.begin_
  204. , that->state_.cur_
  205. , that->state_.end_
  206. , that->state_.next_search_
  207. , that->rex_
  208. , that->flags_
  209. , that->not_null_
  210. );
  211. detail::core_access<BidiIter>::get_action_args(this->impl_->what_)
  212. = detail::core_access<BidiIter>::get_action_args(that->what_);
  213. }
  214. }
  215. /// INTERNAL ONLY
  216. void next_()
  217. {
  218. BOOST_ASSERT(this->impl_ && 1 == this->impl_->use_count());
  219. if(!this->impl_->next())
  220. {
  221. this->impl_ = 0;
  222. }
  223. }
  224. intrusive_ptr<impl_type_> impl_;
  225. };
  226. }} // namespace boost::xpressive
  227. #endif