boyer_moore.hpp 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. ///////////////////////////////////////////////////////////////////////////////
  2. /// \file boyer_moore.hpp
  3. /// Contains the boyer-moore implementation. Note: this is *not* a general-
  4. /// purpose boyer-moore implementation. It truncates the search string at
  5. /// 256 characters, but it is sufficient for the needs of xpressive.
  6. //
  7. // Copyright 2008 Eric Niebler. Distributed under the Boost
  8. // Software License, Version 1.0. (See accompanying file
  9. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  10. #ifndef BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005
  11. #define BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005
  12. // MS compatible compilers support #pragma once
  13. #if defined(_MSC_VER)
  14. # pragma once
  15. # pragma warning(push)
  16. # pragma warning(disable : 4100) // unreferenced formal parameter
  17. #endif
  18. #include <climits> // for UCHAR_MAX
  19. #include <cstddef> // for std::ptrdiff_t
  20. #include <utility> // for std::max
  21. #include <vector>
  22. #include <boost/mpl/bool.hpp>
  23. #include <boost/noncopyable.hpp>
  24. #include <boost/iterator/iterator_traits.hpp>
  25. #include <boost/type_traits/is_convertible.hpp>
  26. #include <boost/xpressive/detail/detail_fwd.hpp>
  27. namespace boost { namespace xpressive { namespace detail
  28. {
  29. ///////////////////////////////////////////////////////////////////////////////
  30. // boyer_moore
  31. //
  32. template<typename BidiIter, typename Traits>
  33. struct boyer_moore
  34. : noncopyable
  35. {
  36. typedef typename iterator_value<BidiIter>::type char_type;
  37. typedef Traits traits_type;
  38. typedef has_fold_case<Traits> case_fold;
  39. typedef typename Traits::string_type string_type;
  40. // initialize the Boyer-Moore search data structure, using the
  41. // search sub-sequence to prime the pump.
  42. boyer_moore(char_type const *begin, char_type const *end, Traits const &tr, bool icase)
  43. : begin_(begin)
  44. , last_(begin)
  45. , fold_()
  46. , find_fun_(
  47. icase
  48. ? (case_fold() ? &boyer_moore::find_nocase_fold_ : &boyer_moore::find_nocase_)
  49. : &boyer_moore::find_
  50. )
  51. {
  52. std::ptrdiff_t const uchar_max = UCHAR_MAX;
  53. std::ptrdiff_t diff = std::distance(begin, end);
  54. this->length_ = static_cast<unsigned char>((std::min)(diff, uchar_max));
  55. std::fill_n(static_cast<unsigned char *>(this->offsets_), uchar_max + 1, this->length_);
  56. --this->length_;
  57. icase ? this->init_(tr, case_fold()) : this->init_(tr, mpl::false_());
  58. }
  59. BidiIter find(BidiIter begin, BidiIter end, Traits const &tr) const
  60. {
  61. return (this->*this->find_fun_)(begin, end, tr);
  62. }
  63. private:
  64. void init_(Traits const &tr, mpl::false_)
  65. {
  66. for(unsigned char offset = this->length_; offset; --offset, ++this->last_)
  67. {
  68. this->offsets_[tr.hash(*this->last_)] = offset;
  69. }
  70. }
  71. void init_(Traits const &tr, mpl::true_)
  72. {
  73. this->fold_.reserve(this->length_ + 1);
  74. for(unsigned char offset = this->length_; offset; --offset, ++this->last_)
  75. {
  76. this->fold_.push_back(tr.fold_case(*this->last_));
  77. for(typename string_type::const_iterator beg = this->fold_.back().begin(), end = this->fold_.back().end();
  78. beg != end; ++beg)
  79. {
  80. this->offsets_[tr.hash(*beg)] = offset;
  81. }
  82. }
  83. this->fold_.push_back(tr.fold_case(*this->last_));
  84. }
  85. // case-sensitive Boyer-Moore search
  86. BidiIter find_(BidiIter begin, BidiIter end, Traits const &tr) const
  87. {
  88. typedef typename boost::iterator_difference<BidiIter>::type diff_type;
  89. diff_type const endpos = std::distance(begin, end);
  90. diff_type offset = static_cast<diff_type>(this->length_);
  91. for(diff_type curpos = offset; curpos < endpos; curpos += offset)
  92. {
  93. std::advance(begin, offset);
  94. char_type const *pat_tmp = this->last_;
  95. BidiIter str_tmp = begin;
  96. for(; tr.translate(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp)
  97. {
  98. if(pat_tmp == this->begin_)
  99. {
  100. return str_tmp;
  101. }
  102. }
  103. offset = this->offsets_[tr.hash(tr.translate(*begin))];
  104. }
  105. return end;
  106. }
  107. // case-insensitive Boyer-Moore search
  108. BidiIter find_nocase_(BidiIter begin, BidiIter end, Traits const &tr) const
  109. {
  110. typedef typename boost::iterator_difference<BidiIter>::type diff_type;
  111. diff_type const endpos = std::distance(begin, end);
  112. diff_type offset = static_cast<diff_type>(this->length_);
  113. for(diff_type curpos = offset; curpos < endpos; curpos += offset)
  114. {
  115. std::advance(begin, offset);
  116. char_type const *pat_tmp = this->last_;
  117. BidiIter str_tmp = begin;
  118. for(; tr.translate_nocase(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp)
  119. {
  120. if(pat_tmp == this->begin_)
  121. {
  122. return str_tmp;
  123. }
  124. }
  125. offset = this->offsets_[tr.hash(tr.translate_nocase(*begin))];
  126. }
  127. return end;
  128. }
  129. // case-insensitive Boyer-Moore search with case-folding
  130. BidiIter find_nocase_fold_(BidiIter begin, BidiIter end, Traits const &tr) const
  131. {
  132. typedef typename boost::iterator_difference<BidiIter>::type diff_type;
  133. diff_type const endpos = std::distance(begin, end);
  134. diff_type offset = static_cast<diff_type>(this->length_);
  135. for(diff_type curpos = offset; curpos < endpos; curpos += offset)
  136. {
  137. std::advance(begin, offset);
  138. string_type const *pat_tmp = &this->fold_.back();
  139. BidiIter str_tmp = begin;
  140. for(; pat_tmp->end() != std::find(pat_tmp->begin(), pat_tmp->end(), *str_tmp);
  141. --pat_tmp, --str_tmp)
  142. {
  143. if(pat_tmp == &this->fold_.front())
  144. {
  145. return str_tmp;
  146. }
  147. }
  148. offset = this->offsets_[tr.hash(*begin)];
  149. }
  150. return end;
  151. }
  152. private:
  153. char_type const *begin_;
  154. char_type const *last_;
  155. std::vector<string_type> fold_;
  156. BidiIter (boyer_moore::*const find_fun_)(BidiIter, BidiIter, Traits const &) const;
  157. unsigned char length_;
  158. unsigned char offsets_[UCHAR_MAX + 1];
  159. };
  160. }}} // namespace boost::xpressive::detail
  161. #if defined(_MSC_VER)
  162. # pragma warning(pop)
  163. #endif
  164. #endif